def new_entry(dict_entry):

    if "id" not in dict_entry:
        print "There is no id!!"
        print dict_entry
        return False
    if "full_author" not in dict_entry:
        print "There is no full_author!!"
        print dict_entry
        return False
    if "year" not in dict_entry:
        print "There is no year!!"
        print dict_entry
        return False
    if "bibtex_id" not in dict_entry:
        print "There is no bibtex_id!!"
        print dict_entry
        return False
    if "type" not in dict_entry:
        print "There is no type!!"
        print dict_entry
        return False
    if "title" not in dict_entry:
        print "There is no title!!"
        print dict_entry
        return False
    #if "author" not in dict_entry:
    #    print "There is no author!!"
    #    print dict_entry
    #    return False

    current_bibtex_id = unidecode.unidecode(dict_entry["bibtex_id"])

    Biblio.create(id=dict_entry["id"],
                  full_author=dict_entry["full_author"].encode('utf-8'),
                  year=dict_entry["year"],
                  bibtex_id=current_bibtex_id,
                  type=dict_entry["type"],
                  title=dict_entry["title"].encode('utf-8'))
    #author = dict_entry["author"].encode('utf-8'))

    if "journal" in dict_entry:
        update_journal(current_bibtex_id, dict_entry["journal"])
    if "publisher" in dict_entry:
        update_publisher(current_bibtex_id, dict_entry["publisher"])
    if "volume" in dict_entry:
        update_volume(current_bibtex_id, dict_entry["volume"])
    if "issue" in dict_entry:
        update_issue(current_bibtex_id, dict_entry["issue"])
    if "pages" in dict_entry:
        update_pages(current_bibtex_id, dict_entry["pages"])
    if "isbn_issn" in dict_entry:
        update_isbn_issn(current_bibtex_id, dict_entry["isbn_issn"])
    if "doi" in dict_entry:
        update_doi(current_bibtex_id, dict_entry["doi"])

    return True
예제 #2
0
def count_reproducible_method_in_year(year):
    #Returns an array with the number of yes and no.
    return [
        Biblio.select().where(
            Biblio.reproducible_method.contains("yes") & (Biblio.year == year)
            & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count(),
        Biblio.select().where(
            Biblio.reproducible_method.contains("no") & (Biblio.year == year)
            & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count()
    ]
예제 #3
0
def return_selection(objective, state):
    if objective:
        if state:
            selection = Biblio.select().where(
                (Biblio.main_objective.contains(objective))
                & (Biblio.julio_state.contains(state)))
        else:
            selection = Biblio.select().where(
                Biblio.main_objective.contains(objective))
    else:
        selection = Biblio.select()

    return selection
def extract_citations_in_database():
    selection = Biblio.select(Biblio.bibtex_id).where(
        Biblio.raw_bibliography.is_null())

    global_path = "/Users/Julio/Documents/PhD/Papers/Security/Multi-Step attacks DB/"

    for element in selection:
        try:
            string_of_path = global_path + "Corpus/" + element.bibtex_id + ".pdf"
            path_to_file = Path(string_of_path)
            if not path_to_file.is_file():
                string_of_path = global_path + "Papers/" + element.bibtex_id + ".pdf"
                path_to_file = Path(string_of_path)
                if path_to_file.is_file():
                    string_with_references = extract_citations_from_pdf(
                        string_of_path)
                else:
                    print "NOT WORKED"
                    string_with_references = "NOT WORKED"
            else:
                string_with_references = extract_citations_from_pdf(
                    string_of_path)

            update_raw_bibliography(element.bibtex_id, string_with_references)
        except TypeError, IndexError:
            print "Not worked for: " + element.bibtex_id
            update_raw_bibliography(element.bibtex_id,
                                    "SOME ERROR OF THE PROGRAM")
def make_array_richer(array):
    result = []
    for element in array:
        all_papers = Biblio.select().where((Biblio.main_objective.contains('detection'))&(Biblio.full_author.contains(element[0]))&(Biblio.julio_state.contains("integrated"))).count()
        print str(all_papers) + " ==== " + str(element[1])

    return result
예제 #6
0
def extract_number_of_papers_per_approach(approaches, begin_year, end_year):
    #It accept an array of duples called "type" with the "key" the type to print and in the value an array of the values to search
    #We are going to use the same structure of types but we are going to change the arrays of subtypes by the data extracted from the dataset for doing the plots
    result = approaches
    paper_counter = 0
    counter = 0
    for global_tuple in approaches:
        #The global types they are already in "result"
        plot_for_type = []
        for i in range(begin_year, end_year):
            #We need to look for all the entries having the subtypes as types and we count the result
            number_of_results = 0
            for subtype in global_tuple[1]:
                number_of_results += Biblio.select().where(
                    (Biblio.year == i) & (Biblio.approach == subtype)
                    & (Biblio.julio_state.contains("integrated_core"))
                    & (Biblio.main_objective.contains("detection"))).count()

            plot_for_type.append([i, number_of_results])
            paper_counter += number_of_results

        result[counter][1] = plot_for_type
        counter += 1

    print "Total number of papers extracted: " + str(paper_counter)
    return result
def extract_citation_number_for_the_non_found():

    #We only select those records with NUll in the URL
    selection = Biblio.select(Biblio.title, Biblio.bibtex_id).where(
        Biblio.citations_google.is_null())

    querier.apply_settings(settings)

    query = SearchScholarQuery()

    element = selection[0]

    for element in selection:

        full_csv = do_query("\"" + element.title + "\"", query)
        print "*******************"
        print "TITLE: " + element.title
        print full_csv
        print "*******************"

        array_of_citation = split_citation(full_csv)

        update_url(element.bibtex_id, array_of_citation[1])
        update_citations_google(element.bibtex_id, array_of_citation[3])
        update_clusterid_google(element.bibtex_id, array_of_citation[5])
        update_url_pdf(element.bibtex_id, array_of_citation[6])

        waiting_time = randint(10, 100)

        for i in range(waiting_time, 0, -1):
            sys.stdout.write(str(i) + ", ")
            sys.stdout.flush()
            sleep(1)
def do_exist(year, full_author, title):
    if Biblio.select().where((Biblio.year == year) & (Biblio.title == title)
                             & (Biblio.full_author == full_author)):
        print "The element already exists: " + str(
            year) + " " + full_author + " - " + title
        return True
    elif Biblio.select().where(Biblio.year == year and Biblio.title == title):
        print "ATTENTION: there is a reference with same year and title. WE ADD ANYWAYS"
        print str(year) + " " + full_author + " - " + title
        return False
    elif Biblio.select().where(Biblio.year == year
                               and Biblio.full_author == full_author):
        print "ATTENTION: there is a reference with same year and authors. WE ADD ANYWAYS"
        print str(year) + " " + full_author + " - " + title
        return False
    else:
        return False
def decide_bibtex_id(first_try):
    lets_try = first_try
    list_of_letters = list(string.ascii_lowercase)
    counter = 1
    while Biblio.select().where(Biblio.bibtex_id == lets_try):
        lets_try = first_try[:2] + list_of_letters[counter] + first_try[2:]
        counter += 1
    return lets_try
예제 #10
0
def extract_array_for_comparing():
    selection = Biblio.select()
    result = []

    for element in selection:
        result.append([element.plain_text_reference, element.bibtex_id])

    return result
예제 #11
0
def count_mixed_reproducibility(year):
    return [
        Biblio.select().where(
            Biblio.reproducible_method.contains("yes")
            & Biblio.reproducible_experiments.contains("yes")
            & (Biblio.year == year)
            & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count(),
        Biblio.select().where(
            Biblio.reproducible_method.contains("yes")
            & Biblio.reproducible_experiments.contains("no")
            & (Biblio.year == year)
            & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count(),
        Biblio.select().where(
            Biblio.reproducible_method.contains("no")
            & Biblio.reproducible_experiments.contains("no")
            & (Biblio.year == year)
            & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count()
    ]
def extract_list_of_references(julio_state):
    selection = Biblio.select().where(Biblio.julio_state.contains(julio_state))
    counter = 1

    for element in selection:
        list_of_references = element.raw_bibliography.split(';')
        for reference in list_of_references:
            new_entry({
                "id": counter,
                "reference": reference,
                "coming_from": element.bibtex_id
            })
            counter += 1
def extract_number_of_papers_per_year(begin_year, end_year):

    result = {}

    total = 0

    for i in range(begin_year, end_year):
        number_of_papers = Biblio.select().where(
            (Biblio.year == i) & (Biblio.main_objective == 'detection')
            & (Biblio.julio_state.contains("integrated_core"))).count()
        result[i] = number_of_papers
        print str(i) + "  " + str(number_of_papers)
        total += number_of_papers

    print " "
    print "TOTAL: " + str(total)
    return result
예제 #14
0
def count_for_all_years_v4():
    total = 0
    output_stringRep = "{"
    output_stringAccm = "{"
    output_stringAccd = "{"
    output_stringAcck = "{"
    for year in range(2001, 2019):
        results_rep = count_reproducible_experiments_in_year(year)
        results_accm = count_reproducible_method_in_year(year)
        results_accd = count_accd_in_year(year)
        results_acck = count_acck_in_year(year)

        partial_total = Biblio.select().where(
            (Biblio.year == year) & Biblio.main_objective.contains("detection")
            & Biblio.julio_state.contains("integrated_core")).count()

        print "Year " + str(year)
        print "Reproducible Experiment: " + str(results_rep[0]) + "\t" + str(
            round(float(results_rep[0]) * 100 / partial_total, 1))

        total += partial_total
        print "Total: " + str(partial_total)
        print ""

        output_stringRep += " (" + str(year) + ',' + str(
            round(float(results_rep[0]) * 100 / partial_total, 1)) + ")"
        output_stringAccm += " (" + str(year) + ',' + str(
            round(float(results_accm[0]) * 100 / partial_total, 1)) + ")"
        output_stringAccd += " (" + str(year) + ',' + str(
            round(float(results_accd[0]) * 100 / partial_total, 1)) + ")"
        output_stringAcck += " (" + str(year) + ',' + str(
            round(float(results_acck[0]) * 100 / partial_total, 1)) + ")"

    print "***********"
    print "TOTAL: " + str(total)
    print ""

    print "Rep.:\t" + output_stringRep + " };"
    print "Accm:\t" + output_stringAccm + " };"
    print "Accd:\t" + output_stringAccd + " };"
    print "Acck:\t" + output_stringAcck + " };"
def extract_citation_number_for_all():

    #We update all the registers, even if they already have a number of citations
    selection = Biblio.select(Biblio.title, Biblio.bibtex_id)

    querier.apply_settings(settings)

    query = SearchScholarQuery()

    element = selection[0]

    centinela = 1

    for element in selection:

        full_csv = do_query("\"" + element.title + "\"", query)
        print "*******************"
        print "TITLE: " + element.title
        print full_csv
        print "*******************"

        array_of_citation = split_citation(full_csv)

        update_url(element.bibtex_id, array_of_citation[1])
        update_citations_google(element.bibtex_id, array_of_citation[3])
        update_clusterid_google(element.bibtex_id, array_of_citation[5])
        update_url_pdf(element.bibtex_id, array_of_citation[6])

        print "Iteration " + str(centinela)
        centinela += 1

        waiting_time = randint(10, 100)

        for i in range(waiting_time, 0, -1):
            sys.stdout.write(str(i) + ", ")
            sys.stdout.flush()
            sleep(1)
def update_raw_bibliography(bibtex_id, new_raw_bibliography):
    query = Biblio.update(raw_bibliography=new_raw_bibliography).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
예제 #17
0
#Copyright 2018 Julio Navarro
#Built at the University of Strasbourg (France). CSTB team @ ICube laboratory
import peewee as pw
import sys

sys.path.insert(0,'../')
sys.path.insert(0,'../database_queries/')

from database_queries_biblio import *
from models import Biblio

selection = Biblio.select()

def extract_plain_reference_through_references():
    for element in selection:
        author = "NULL"
        title ="NULL"
        journal ="NULL"
        publisher ="NULL"
        volume ="NULL"
        issue ="NULL"
        pages ="NULL"
        if element.author:
            author = element.author
        if element.title:
            title = element.title
        if element.journal:
            journal = element.journal
        if element.publisher:
            publisher = element.publisher
        if element.volume:
#!/usr/bin/env python
#Copyright 2018 Julio Navarro
#Built at the University of Strasbourg (France). CSTB team @ ICube laboratory
# -*- coding: utf-8 -*-

import peewee as pw
import sys

sys.path.insert(0,'../')
sys.path.insert(0,'../database_queries/')

from database_queries_biblio import *
from models import Biblio

selection = Biblio.select()#.where(Biblio.julio_state != "Excluded")

def clean_string(dirty_string):
    result = dirty_string.replace("&", "\\&")
    return result

def building_bibtex_conference(element):
    result = "@inproceedings{"+element.bibtex_id+",\n"
    if not element.bibtex_full_author:
        print "Empty authors in "+element.bibtex_id
    else:
        result += "\tauthor={"+clean_string(element.bibtex_full_author)+"},\n"
    if not element.bibtex_title:
        print "Empty title in "+element.bibtex_id
    else:
        result += "\ttitle={"+clean_string(element.bibtex_title)+"},\n"
    if not element.journal:
def update_pages(bibtex_id, new_pages):
    query = Biblio.update(pages=new_pages).where(Biblio.bibtex_id == bibtex_id)
    query.execute()
예제 #20
0
import peewee as pw
import sys
import operator
import re
from representation_ranking import represent_ranking_in_table

sys.path.insert(0,'../../')

from models import Biblio

selection = Biblio.select(Biblio.full_author, Biblio.bibtex_id).where((Biblio.main_objective == 'detection')&(Biblio.julio_state.contains("integrated")))

def extract_authors():
    names = []
    for reference in selection:
        names = names + [x.strip() for x in reference.full_author.split(' and ')]

    different_names_set = set(names)

    different_names = list(different_names_set)

    different_names.sort()
    counter_others = 0

    dictionary_of_names = {}

    for element in different_names:
        total_number = names.count(element)
        dictionary_of_names[element] = total_number
        print element + '\t' + str(total_number)
def update_isbn_issn(bibtex_id, new_isbn_issn):
    query = Biblio.update(isbn_issn=new_isbn_issn).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
def update_doi(bibtex_id, new_doi):
    query = Biblio.update(doi=new_doi).where(Biblio.bibtex_id == bibtex_id)
    query.execute()
예제 #23
0
#Copyright 2018 Julio Navarro
#Built at the University of Strasbourg (France). CSTB team @ ICube laboratory
import peewee as pw
import sys

sys.path.insert(0,'../')
sys.path.insert(0,'../database_queries/')

from models import Biblio
from database_queries_authordetect import get_max_id,new_entry_safe

selection = Biblio.select().where((Biblio.main_objective == 'detection')&(Biblio.julio_state.contains("integrated")))

def extract_authors():
    author_dict = {}
    for reference in selection:
        array_of_authors = [x.strip() for x in reference.full_author.split(' and ')]

        citations = reference.citations_google
        bibtex_id = reference.bibtex_id

        for author in array_of_authors:
            #We need an array only with the rest of the authors
            other_authors = list(array_of_authors)
            other_authors.remove(author)
            if author not in author_dict:
                author_dict[author] = [[bibtex_id],
                                        1,
                                        citations,
                                        citations,
                                        citations,
def update_journal(bibtex_id, new_journal):
    query = Biblio.update(journal=new_journal.encode('utf-8')).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
예제 #25
0
import peewee as pw
import sys
import operator
import itertools

sys.path.insert(0,'../../')

from models import Biblio

selection = Biblio.select(Biblio.dataset).where((Biblio.main_objective.contains('detection'))&(Biblio.julio_state.contains('integrated'))&(Biblio.type_experiment.contains("public")))

equivalence_dict = {
'private_dataset':'NULL',
'darpa_1999':'NULL',
'darpa_2000': 'DARPA 2000',
'simulation': 'NULL',
'nsa_interservice_academy_cyber_defense_competition': 'NSA ...',
'2002_UCSB_treasure_hunt': 'UCSB',
'2004_ucsb_treasure_hunt': 'UCSB',
'darpa_gcp': 'DARPA GCP',
'darpa_gcp_v3_1': 'DARPA GCP',
'darpa_gcp_v3_2': 'DARPA GCP',
'darpa_gcp_v2_0': 'DARPA GCP',
'defcon_2010_18': 'DEFCON',
'2008_ucsb_ictf': 'UCSB',
'defcon_8': 'DEFCON',
'defcon_9': 'DEFCON'
}

list_of_elements = ['DARPA 2000','DARPA GCP','DEFCON','UCSB','NSA ...']
def update_citations_google(bibtex_id, num_citations):
    query = Biblio.update(citations_google=num_citations).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
def update_url(bibtex_id, new_url):
    query = Biblio.update(url=new_url).where(Biblio.bibtex_id == bibtex_id)
    query.execute()
def update_plain_text_reference(bibtex_id, new_plain_text_reference):
    query = Biblio.update(plain_text_reference=new_plain_text_reference).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
def update_url_pdf(bibtex_id, new_url_pdf):
    query = Biblio.update(url_pdf=new_url_pdf).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()
def update_clusterid_google(bibtex_id, new_clusterid_google):
    query = Biblio.update(clusterid_google=new_clusterid_google).where(
        Biblio.bibtex_id == bibtex_id)
    query.execute()