def count_reproducible_method_in_year(year): #Returns an array with the number of yes and no. return [ Biblio.select().where( Biblio.reproducible_method.contains("yes") & (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count(), Biblio.select().where( Biblio.reproducible_method.contains("no") & (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count() ]
def return_selection(objective, state): if objective: if state: selection = Biblio.select().where( (Biblio.main_objective.contains(objective)) & (Biblio.julio_state.contains(state))) else: selection = Biblio.select().where( Biblio.main_objective.contains(objective)) else: selection = Biblio.select() return selection
def extract_citations_in_database(): selection = Biblio.select(Biblio.bibtex_id).where( Biblio.raw_bibliography.is_null()) global_path = "/Users/Julio/Documents/PhD/Papers/Security/Multi-Step attacks DB/" for element in selection: try: string_of_path = global_path + "Corpus/" + element.bibtex_id + ".pdf" path_to_file = Path(string_of_path) if not path_to_file.is_file(): string_of_path = global_path + "Papers/" + element.bibtex_id + ".pdf" path_to_file = Path(string_of_path) if path_to_file.is_file(): string_with_references = extract_citations_from_pdf( string_of_path) else: print "NOT WORKED" string_with_references = "NOT WORKED" else: string_with_references = extract_citations_from_pdf( string_of_path) update_raw_bibliography(element.bibtex_id, string_with_references) except TypeError, IndexError: print "Not worked for: " + element.bibtex_id update_raw_bibliography(element.bibtex_id, "SOME ERROR OF THE PROGRAM")
def make_array_richer(array): result = [] for element in array: all_papers = Biblio.select().where((Biblio.main_objective.contains('detection'))&(Biblio.full_author.contains(element[0]))&(Biblio.julio_state.contains("integrated"))).count() print str(all_papers) + " ==== " + str(element[1]) return result
def extract_number_of_papers_per_approach(approaches, begin_year, end_year): #It accept an array of duples called "type" with the "key" the type to print and in the value an array of the values to search #We are going to use the same structure of types but we are going to change the arrays of subtypes by the data extracted from the dataset for doing the plots result = approaches paper_counter = 0 counter = 0 for global_tuple in approaches: #The global types they are already in "result" plot_for_type = [] for i in range(begin_year, end_year): #We need to look for all the entries having the subtypes as types and we count the result number_of_results = 0 for subtype in global_tuple[1]: number_of_results += Biblio.select().where( (Biblio.year == i) & (Biblio.approach == subtype) & (Biblio.julio_state.contains("integrated_core")) & (Biblio.main_objective.contains("detection"))).count() plot_for_type.append([i, number_of_results]) paper_counter += number_of_results result[counter][1] = plot_for_type counter += 1 print "Total number of papers extracted: " + str(paper_counter) return result
def extract_citation_number_for_the_non_found(): #We only select those records with NUll in the URL selection = Biblio.select(Biblio.title, Biblio.bibtex_id).where( Biblio.citations_google.is_null()) querier.apply_settings(settings) query = SearchScholarQuery() element = selection[0] for element in selection: full_csv = do_query("\"" + element.title + "\"", query) print "*******************" print "TITLE: " + element.title print full_csv print "*******************" array_of_citation = split_citation(full_csv) update_url(element.bibtex_id, array_of_citation[1]) update_citations_google(element.bibtex_id, array_of_citation[3]) update_clusterid_google(element.bibtex_id, array_of_citation[5]) update_url_pdf(element.bibtex_id, array_of_citation[6]) waiting_time = randint(10, 100) for i in range(waiting_time, 0, -1): sys.stdout.write(str(i) + ", ") sys.stdout.flush() sleep(1)
def do_exist(year, full_author, title): if Biblio.select().where((Biblio.year == year) & (Biblio.title == title) & (Biblio.full_author == full_author)): print "The element already exists: " + str( year) + " " + full_author + " - " + title return True elif Biblio.select().where(Biblio.year == year and Biblio.title == title): print "ATTENTION: there is a reference with same year and title. WE ADD ANYWAYS" print str(year) + " " + full_author + " - " + title return False elif Biblio.select().where(Biblio.year == year and Biblio.full_author == full_author): print "ATTENTION: there is a reference with same year and authors. WE ADD ANYWAYS" print str(year) + " " + full_author + " - " + title return False else: return False
def extract_array_for_comparing(): selection = Biblio.select() result = [] for element in selection: result.append([element.plain_text_reference, element.bibtex_id]) return result
def decide_bibtex_id(first_try): lets_try = first_try list_of_letters = list(string.ascii_lowercase) counter = 1 while Biblio.select().where(Biblio.bibtex_id == lets_try): lets_try = first_try[:2] + list_of_letters[counter] + first_try[2:] counter += 1 return lets_try
def count_mixed_reproducibility(year): return [ Biblio.select().where( Biblio.reproducible_method.contains("yes") & Biblio.reproducible_experiments.contains("yes") & (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count(), Biblio.select().where( Biblio.reproducible_method.contains("yes") & Biblio.reproducible_experiments.contains("no") & (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count(), Biblio.select().where( Biblio.reproducible_method.contains("no") & Biblio.reproducible_experiments.contains("no") & (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count() ]
def extract_list_of_references(julio_state): selection = Biblio.select().where(Biblio.julio_state.contains(julio_state)) counter = 1 for element in selection: list_of_references = element.raw_bibliography.split(';') for reference in list_of_references: new_entry({ "id": counter, "reference": reference, "coming_from": element.bibtex_id }) counter += 1
def extract_number_of_papers_per_year(begin_year, end_year): result = {} total = 0 for i in range(begin_year, end_year): number_of_papers = Biblio.select().where( (Biblio.year == i) & (Biblio.main_objective == 'detection') & (Biblio.julio_state.contains("integrated_core"))).count() result[i] = number_of_papers print str(i) + " " + str(number_of_papers) total += number_of_papers print " " print "TOTAL: " + str(total) return result
def count_for_all_years_v4(): total = 0 output_stringRep = "{" output_stringAccm = "{" output_stringAccd = "{" output_stringAcck = "{" for year in range(2001, 2019): results_rep = count_reproducible_experiments_in_year(year) results_accm = count_reproducible_method_in_year(year) results_accd = count_accd_in_year(year) results_acck = count_acck_in_year(year) partial_total = Biblio.select().where( (Biblio.year == year) & Biblio.main_objective.contains("detection") & Biblio.julio_state.contains("integrated_core")).count() print "Year " + str(year) print "Reproducible Experiment: " + str(results_rep[0]) + "\t" + str( round(float(results_rep[0]) * 100 / partial_total, 1)) total += partial_total print "Total: " + str(partial_total) print "" output_stringRep += " (" + str(year) + ',' + str( round(float(results_rep[0]) * 100 / partial_total, 1)) + ")" output_stringAccm += " (" + str(year) + ',' + str( round(float(results_accm[0]) * 100 / partial_total, 1)) + ")" output_stringAccd += " (" + str(year) + ',' + str( round(float(results_accd[0]) * 100 / partial_total, 1)) + ")" output_stringAcck += " (" + str(year) + ',' + str( round(float(results_acck[0]) * 100 / partial_total, 1)) + ")" print "***********" print "TOTAL: " + str(total) print "" print "Rep.:\t" + output_stringRep + " };" print "Accm:\t" + output_stringAccm + " };" print "Accd:\t" + output_stringAccd + " };" print "Acck:\t" + output_stringAcck + " };"
def extract_citation_number_for_all(): #We update all the registers, even if they already have a number of citations selection = Biblio.select(Biblio.title, Biblio.bibtex_id) querier.apply_settings(settings) query = SearchScholarQuery() element = selection[0] centinela = 1 for element in selection: full_csv = do_query("\"" + element.title + "\"", query) print "*******************" print "TITLE: " + element.title print full_csv print "*******************" array_of_citation = split_citation(full_csv) update_url(element.bibtex_id, array_of_citation[1]) update_citations_google(element.bibtex_id, array_of_citation[3]) update_clusterid_google(element.bibtex_id, array_of_citation[5]) update_url_pdf(element.bibtex_id, array_of_citation[6]) print "Iteration " + str(centinela) centinela += 1 waiting_time = randint(10, 100) for i in range(waiting_time, 0, -1): sys.stdout.write(str(i) + ", ") sys.stdout.flush() sleep(1)
#Copyright 2018 Julio Navarro #Built at the University of Strasbourg (France). CSTB team @ ICube laboratory import peewee as pw import sys sys.path.insert(0,'../') sys.path.insert(0,'../database_queries/') from database_queries_biblio import * from models import Biblio selection = Biblio.select() def extract_plain_reference_through_references(): for element in selection: author = "NULL" title ="NULL" journal ="NULL" publisher ="NULL" volume ="NULL" issue ="NULL" pages ="NULL" if element.author: author = element.author if element.title: title = element.title if element.journal: journal = element.journal if element.publisher: publisher = element.publisher if element.volume:
def get_count_of_julio_state(value): return Biblio.select().where(Biblio.julio_state.contains(value)).count()
#!/usr/bin/env python #Copyright 2018 Julio Navarro #Built at the University of Strasbourg (France). CSTB team @ ICube laboratory # -*- coding: utf-8 -*- import peewee as pw import sys sys.path.insert(0,'../') sys.path.insert(0,'../database_queries/') from database_queries_biblio import * from models import Biblio selection = Biblio.select()#.where(Biblio.julio_state != "Excluded") def clean_string(dirty_string): result = dirty_string.replace("&", "\\&") return result def building_bibtex_conference(element): result = "@inproceedings{"+element.bibtex_id+",\n" if not element.bibtex_full_author: print "Empty authors in "+element.bibtex_id else: result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n" if not element.bibtex_title: print "Empty title in "+element.bibtex_id else: result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n" if not element.journal:
def get_count_of_julio_state_and_main_objective(value_state, value_objective): return Biblio.select().where( (Biblio.julio_state.contains(value_state)) & (Biblio.main_objective.contains(value_objective))).count()
import peewee as pw import sys import operator import re from representation_ranking import represent_ranking_in_table sys.path.insert(0,'../../') from models import Biblio selection = Biblio.select(Biblio.full_author, Biblio.bibtex_id).where((Biblio.main_objective == 'detection')&(Biblio.julio_state.contains("integrated"))) def extract_authors(): names = [] for reference in selection: names = names + [x.strip() for x in reference.full_author.split(' and ')] different_names_set = set(names) different_names = list(different_names_set) different_names.sort() counter_others = 0 dictionary_of_names = {} for element in different_names: total_number = names.count(element) dictionary_of_names[element] = total_number print element + '\t' + str(total_number)
#Copyright 2018 Julio Navarro #Built at the University of Strasbourg (France). CSTB team @ ICube laboratory import peewee as pw import sys sys.path.insert(0,'../') sys.path.insert(0,'../database_queries/') from models import Biblio from database_queries_authordetect import get_max_id,new_entry_safe selection = Biblio.select().where((Biblio.main_objective == 'detection')&(Biblio.julio_state.contains("integrated"))) def extract_authors(): author_dict = {} for reference in selection: array_of_authors = [x.strip() for x in reference.full_author.split(' and ')] citations = reference.citations_google bibtex_id = reference.bibtex_id for author in array_of_authors: #We need an array only with the rest of the authors other_authors = list(array_of_authors) other_authors.remove(author) if author not in author_dict: author_dict[author] = [[bibtex_id], 1, citations, citations, citations,
def get_count_of_main_objective(value): return Biblio.select().where((Biblio.main_objective.contains(value)) & ( Biblio.julio_state.contains("integrated"))).count()
import peewee as pw import sys import operator import itertools sys.path.insert(0,'../../') from models import Biblio selection = Biblio.select(Biblio.dataset).where((Biblio.main_objective.contains('detection'))&(Biblio.julio_state.contains('integrated'))&(Biblio.type_experiment.contains("public"))) equivalence_dict = { 'private_dataset':'NULL', 'darpa_1999':'NULL', 'darpa_2000': 'DARPA 2000', 'simulation': 'NULL', 'nsa_interservice_academy_cyber_defense_competition': 'NSA ...', '2002_UCSB_treasure_hunt': 'UCSB', '2004_ucsb_treasure_hunt': 'UCSB', 'darpa_gcp': 'DARPA GCP', 'darpa_gcp_v3_1': 'DARPA GCP', 'darpa_gcp_v3_2': 'DARPA GCP', 'darpa_gcp_v2_0': 'DARPA GCP', 'defcon_2010_18': 'DEFCON', '2008_ucsb_ictf': 'UCSB', 'defcon_8': 'DEFCON', 'defcon_9': 'DEFCON' } list_of_elements = ['DARPA 2000','DARPA GCP','DEFCON','UCSB','NSA ...']