Ejemplo n.º 1
0
def main():

    try:
        links_afisha = WebSource(config.URL_AFISHA_VLG).get_links_afisha()

        afisha_movies = [
            WebSource(link).get_info_about_movie() for link in links_afisha
        ]

        Storer('database.txt').save('afisha', afisha_movies)

        links_temp = WebSource(url_premieres()).get_links_premieres()

        links_premieres = [
            link for link in links_temp if link not in links_afisha
        ]

        premieres_movies = [
            WebSource(link).get_info_about_movie() for link in links_premieres
        ]

        Storer('database.txt').save('premieres', premieres_movies)
    except Exception as err:
        logging.error(err)
        time.sleep(5)
        print("Error!")
Ejemplo n.º 2
0
 def __init__(self,
              tp_url_real,
              context_path,
              context_file_path,
              base_iri,
              base_dir,
              info_dir,
              dataset_home,
              tmp_dir,
              triplestore_url=None):
     self.tp_url = triplestore_url
     self.base_iri = base_iri
     self.base_dir = base_dir
     self.info_dir = info_dir
     self.context_path = context_path
     self.dataset_home = URIRef(dataset_home)
     self.tmp_dir = tmp_dir
     self.tp_res = URIRef(tp_url_real)
     self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
     self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
     self.st = Storer(context_map={context_path: context_file_path},
                      repok=self.repok,
                      reperr=self.reperr)
     self.st.set_preface_query(
         u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
         (str(DatasetHandler.modified), str(
             DatasetHandler.dataset), str(DatasetHandler.modified)))
Ejemplo n.º 3
0
class Crawler(object):
    def __init__(self):
        self.storer = Storer(db="Quora", collection="user")
        self.parser = Parser("http://www.quora.com/profile/")

    def crawl(self, user_id, depth):
        for item in self.parser.parse_user(user_id, depth):
            self.storer.save(item)
Ejemplo n.º 4
0
 def setUp(self):
     self.file_name = "testeo"
     self.extension_txt = "txt"
     self.extension_csv = "csv"
     self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\
                   ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\
                   ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7']
     self.store01 = Storer(self.matrix, self.file_name, self.extension_txt)
     self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)
Ejemplo n.º 5
0
 def setUp(self):
     self.file_name = "testeo"
     self.extension_txt = "txt"
     self.extension_csv = "csv"
     self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\
                   ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\
                   ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7']
     self.store01 = Storer(self.matrix, self.file_name, self.extension_txt)
     self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)
Ejemplo n.º 6
0
 def __init__(self, input_dir, output_dir=None, tmp_dir=None):
     self.input_dir = input_dir
     self.output_dir = output_dir
     self.tmp_dir = tmp_dir
     self.storer = Storer()
     self.name = self.__class__.__name__
     self.repok = Reporter(prefix="[%s - INFO] " % self.name)
     self.repok.new_article()
     self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
     self.reper.new_article()
Ejemplo n.º 7
0
def handle_start(message):

    if not Storer('userdata.txt').check_user(str(message.chat.id)):
        msg = bot.send_message(
                                message.chat.id,
                                'Выбирайте',
                                reply_markup=markups.choice_buttons1
                              )
        bot.register_next_step_handler(msg, ask_run_or_template)
    else:
        Storer('userdata.txt').save_user(str(message.chat.id))
        msg = bot.send_message(message.chat.id, 'Введите название шаблона')
        bot.register_next_step_handler(msg, ask_template)
Ejemplo n.º 8
0
 def test_initialization_pkl(self):
     """
     Test initialization procedure
     Expected one backup file after dump procedure
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True)
     s.put(1, name="one")
     s.dump()
     s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True)
     one = len(s2.backup_list)
     assert one == 1, f"Got something different: {one}"
     s._cleanup()
Ejemplo n.º 9
0
class Checker(object):
    def __init__(self, input_dir, output_dir=None, tmp_dir=None):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.tmp_dir = tmp_dir
        self.storer = Storer()
        self.name = self.__class__.__name__
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reper.new_article()

    def process(self):
        for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir):
            for cur_file in cur_files:
                self.repok.new_article()
                self.reper.new_article()
                cur_rdf_path = cur_dir + os.sep + cur_file
                try:
                    self.repok.add_sentence("Processing '%s'" % cur_rdf_path)
                    g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir)
                    if self.output_dir is None:
                        self.repok.add_sentence("The RDF graph has been converted in TRIG as follows:\n%s"
                                                % g.serialize(format="trig"))
                    else:
                        if not os.path.exists(self.output_dir):
                            os.makedirs(self.output_dir)
                        output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl"
                        self.repok.add_sentence("The RDF graph has been stored in %s"
                                                % (output_file, g.serialize(output_file, format="trig")))
                except Exception:
                    self.reper.add_sentence("The file '%s' doesn't contain RDF statements", False)
Ejemplo n.º 10
0
 def test_creating_pbz2(self):
     """
     Test paths of the created instance (PBZ2)
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True)
     s.put(1, name="one")
     s.dump()
     assert os.path.exists(os.path.expanduser(PATH_DUMPS) + DUMP_NAME + PBZ2)
     s._cleanup()
Ejemplo n.º 11
0
 def test_backup_dumb_pbz2(self):
     """
     Test backup creating
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True)
     s.put(1, name="one")
     s.backup()
     assert os.path.exists(os.path.expanduser(PATH_DUMPS_BACKUP) + DUMP_NAME + PBZ2)
     s._cleanup()
Ejemplo n.º 12
0
def add_genres(message):        

    genre = message.text
    Storer('userdata.txt').add_genre(str(message.chat.id), template, genre)
    msg = bot.send_message(
                            message.chat.id,
                            'Выбирайте',
                            reply_markup=markups.choice_buttons2
                          )
    bot.register_next_step_handler(msg, add_genre_or_finish)
Ejemplo n.º 13
0
def ask_template(message):

    global template
    template = message.text
    Storer('userdata.txt').add_template(str(message.chat.id), template)
    msg = bot.send_message(
                            message.chat.id, 'Добавьте жанры в шаблон',
                            reply_markup=markups.choice_genres
                          )
    bot.register_next_step_handler(msg, add_genres)
Ejemplo n.º 14
0
 def __init__(self, input_dir, output_dir=None, tmp_dir=None):
     self.input_dir = input_dir
     self.output_dir = output_dir
     self.tmp_dir = tmp_dir
     self.storer = Storer()
     self.name = self.__class__.__name__
     self.repok = Reporter(prefix="[%s - INFO] " % self.name)
     self.repok.new_article()
     self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
     self.reper.new_article()
Ejemplo n.º 15
0
def findMovies(userid):

    usersets = Storer('userdata.txt').get_usersets(str(userid))
    afisha = Storer('database.txt').get_data('afisha')
    afisha_movies = get_info(usersets, afisha)
    if afisha_movies:
        for movie in afisha_movies:
            bot.send_message(userid, movie)
    else:
        bot.send_message(userid, "Из текущего репертуара подходящих фильмов нет")
    bot.send_message(userid, "Обзор среди премьер текущего месяца: ")
    time.sleep(1)
    premieres = Storer('database.txt').get_data('premieres')
    premieres_movies = get_info(usersets, premieres)
    if premieres:
        for movie in premieres_movies:
            bot.send_message(userid, movie)
    else:
        bot.send_message(userid, "Увы, и среди премьер подходящих фильмов нет")
    bot.send_message(userid, 'Готово', reply_markup=markups.start_markup)
Ejemplo n.º 16
0
 def __init__(self,
              g_set=None,
              ts_url=None,
              base_dir=None,
              base_iri=None,
              tmp_dir=None,
              context_map={}):
     self.g = Graph()
     self.base_dir = base_dir
     self.base_iri = base_iri
     self.storer = Storer(context_map=context_map)
     self.tmp_dir = tmp_dir
     self.name = "SPACIN " + self.__class__.__name__
     self.loaded = set()
     if g_set is not None:
         self.update_graph_set(g_set)
     if ts_url is None:
         self.ts = None
     else:
         self.ts = ConjunctiveGraph('SPARQLUpdateStore')
         self.ts.open((ts_url, ts_url))
Ejemplo n.º 17
0
 def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None,
              tmp_dir=None, context_map={}):
     self.g = Graph()
     self.base_dir = base_dir
     self.base_iri = base_iri
     self.storer = Storer(context_map=context_map)
     self.tmp_dir = tmp_dir
     self.name = "SPACIN " + self.__class__.__name__
     self.loaded = set()
     if g_set is not None:
         self.update_graph_set(g_set)
     if ts_url is None:
         self.ts = None
     else:
         self.ts = ConjunctiveGraph('SPARQLUpdateStore')
         self.ts.open((ts_url, ts_url))
Ejemplo n.º 18
0
 def __init__(self, tp_url_real, context_path, context_file_path,
              base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None):
     self.tp_url = triplestore_url
     self.base_iri = base_iri
     self.base_dir = base_dir
     self.info_dir = info_dir
     self.context_path = context_path
     self.dataset_home = URIRef(dataset_home)
     self.tmp_dir = tmp_dir
     self.tp_res = URIRef(tp_url_real)
     self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
     self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
     self.st = Storer(context_map={context_path: context_file_path},
                      repok=self.repok, reperr=self.reperr)
     self.st.set_preface_query(
         u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
         (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified)))
Ejemplo n.º 19
0
class TestStorerClassAndMethods(unittest.TestCase):
    def setUp(self):
        self.file_name = "testeo"
        self.extension_txt = "txt"
        self.extension_csv = "csv"
        self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\
                      ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\
                      ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7']
        self.store01 = Storer(self.matrix, self.file_name, self.extension_txt)
        self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)

    def test_storer_receive_righ_number_of_elements_from_1_to_9_be_stored(
            self):
        num_elements = self.store01.verify_right_matrix()
        self.assertEqual(81, num_elements)

    def test_a_sudoku_solution_is_storer_in_txt_format(self):
        self.store01.save_matrix_to_file()
        self.assertTrue(
            self.file_exist(self.store01.sudoku_file,
                            self.store01.sudoku_extension))

    def test_a_sudoku_solution_is_storer_in_csv_format(self):
        self.store02.save_matrix_to_file()
        self.assertTrue(
            self.file_exist(self.store02.sudoku_file,
                            self.store02.sudoku_extension))

    def test_if_the_file_already_exist_an_alert_is_displayed(self):
        error = self.store02.save_matrix_to_file()
        self.assertEqual("File already exist", error)

    def file_exist(self, f_name, extension_file):
        """
        This def is auxiliar to verify that the new file was created properly.
        """
        try:
            f = open(f_name + "." + extension_file, 'r')
            f.read()
            f.close()
            return True

        except IOError:
            return IOError
Ejemplo n.º 20
0
class Checker(object):
    def __init__(self, input_dir, output_dir=None, tmp_dir=None):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.tmp_dir = tmp_dir
        self.storer = Storer()
        self.name = self.__class__.__name__
        self.repok = Reporter(prefix="[%s - INFO] " % self.name)
        self.repok.new_article()
        self.reper = Reporter(prefix="[%s - ERROR] " % self.name)
        self.reper.new_article()

    def process(self):
        for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir):
            for cur_file in cur_files:
                self.repok.new_article()
                self.reper.new_article()
                cur_rdf_path = cur_dir + os.sep + cur_file
                try:
                    self.repok.add_sentence("Processing '%s'" % cur_rdf_path)
                    g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir)
                    if self.output_dir is None:
                        self.repok.add_sentence(
                            "The RDF graph has been converted in TRIG as follows:\n%s"
                            % g.serialize(format="trig"))
                    else:
                        if not os.path.exists(self.output_dir):
                            os.makedirs(self.output_dir)
                        output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl"
                        self.repok.add_sentence(
                            "The RDF graph has been stored in %s" %
                            (output_file,
                             g.serialize(output_file, format="trig")))
                except Exception:
                    self.reper.add_sentence(
                        "The file '%s' doesn't contain RDF statements", False)
Ejemplo n.º 21
0
class TestStorerClassAndMethods(unittest.TestCase):

    def setUp(self):
        self.file_name = "testeo"
        self.extension_txt = "txt"
        self.extension_csv = "csv"
        self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\
                      ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\
                      ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7']
        self.store01 = Storer(self.matrix, self.file_name, self.extension_txt)
        self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)

    def test_storer_receive_righ_number_of_elements_from_1_to_9_be_stored(self):
        num_elements = self.store01.verify_right_matrix()
        self.assertEqual (81, num_elements)

    def test_a_sudoku_solution_is_storer_in_txt_format(self):
        self.store01.save_matrix_to_file()
        self.assertTrue(self.file_exist(self.store01.sudoku_file,self.store01.sudoku_extension))

    def test_a_sudoku_solution_is_storer_in_csv_format(self):
        self.store02.save_matrix_to_file()
        self.assertTrue(self.file_exist(self.store02.sudoku_file,self.store02.sudoku_extension))
    
    def test_if_the_file_already_exist_an_alert_is_displayed(self):
        error = self.store02.save_matrix_to_file()
        self.assertEqual("File already exist", error)

    def file_exist(self, f_name, extension_file):
        """
        This def is auxiliar to verify that the new file was created properly.
        """
        try: 
            f=open(f_name+"."+extension_file,'r')
            f.read()
            f.close()
            return True
            
        except IOError:
            return IOError
Ejemplo n.º 22
0
    arg_parser = argparse.ArgumentParser(
        "find_by_identifier.py",
        description="This script allows one to identify the files in a given directory "
                    "containing RDF documents that seem to be identical according to the "
                    "identifier of the entity they describe.")
    arg_parser.add_argument("-i", "--input_dir", dest="i_dir", required=True,
                            help="The directory where to look for duplicates.")
    arg_parser.add_argument("-o", "--output_file", dest="o_file",
                            help="The file where to write the results.")
    arg_parser.add_argument("--recursive", dest="rec", default=False, action="store_true",
                            help="The process will consider also the subdir recursively.")
    args = arg_parser.parse_args()

    id_doc = {}

    s = Storer(context_map={context_path: context_file_path})

    all_files = []
    if args.rec:
        for cur_dir, cur_subdir, cur_files in os.walk(args.i_dir):
            for cur_file in cur_files:
                if cur_file.endswith(".json"):
                    all_files += [cur_dir + os.sep + cur_file]
    else:
        for cur_file in os.listdir(args.i_dir):
            if cur_file.endswith(".json"):
                all_files += [args.i_dir + os.sep + cur_file]

    for rdf_path in all_files:
        cur_g = s.load(rdf_path, tmp_dir=temp_dir_for_rdf_loading)
        try:
Ejemplo n.º 23
0
#!/usr/bin/env python3

from storer import Storer
import sys

s = Storer()

if s.get_value() != 0:
    print('Initial value incorrect.')
    sys.exit(1)

s.set_value(42)

if s.get_value() != 42:
    print('Setting value failed.')
    sys.exit(1)

try:
    s.set_value('not a number')
    print('Using wrong argument type did not fail.')
    sys.exit(1)
except TypeError:
    pass
Ejemplo n.º 24
0
class DatasetHandler(object):
    DCTERMS = Namespace("http://purl.org/dc/terms/")
    DCAT = Namespace("http://www.w3.org/ns/dcat#")
    VOID = Namespace("http://rdfs.org/ns/void#")
    MTT = Namespace("https://w3id.org/spar/mediatype/text/")
    DBR = Namespace("http://dbpedia.org/resource/")

    dataset = DCAT.Dataset
    datafile = DCAT.Distribution

    title = DCTERMS.title
    description = DCTERMS.description
    issued = DCTERMS.issued
    modified = DCTERMS.modified
    keyword = DCAT.keyword
    subject = DCAT.theme
    landing_page = DCAT.landingPage
    subset = VOID.subset
    sparql_endpoint = VOID.sparqlEndpoint
    distribution = DCAT.distribution
    license = DCTERMS.license
    download_url = DCAT.downloadURL
    media_type = DCAT.mediaType
    byte_size = DCAT.byte_size
    label = RDFS.label
    a = RDF.type
    turtle = MTT.turtle
    bibliographic_database = DBR.Bibliographic_database
    open_access = DBR.Open_access
    scholary_communication = DBR.Scholarly_communication
    citations = DBR.Citation

    def __init__(self, tp_url_real, context_path, context_file_path,
                 base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None):
        self.tp_url = triplestore_url
        self.base_iri = base_iri
        self.base_dir = base_dir
        self.info_dir = info_dir
        self.context_path = context_path
        self.dataset_home = URIRef(dataset_home)
        self.tmp_dir = tmp_dir
        self.tp_res = URIRef(tp_url_real)
        self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
        self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
        self.st = Storer(context_map={context_path: context_file_path},
                         repok=self.repok, reperr=self.reperr)
        self.st.set_preface_query(
            u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
            (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified)))

    # /START Create Literal
    def create_label(self, g, res, string):
        return create_literal(g, res, RDFS.label, string)

    def create_publication_date(self, g, res, string):
        return create_literal(g, res, self.issued, string, XSD.dateTime)

    def update_modification_date(self, g, res, string):
        g.remove((res, self.modified, None))
        return create_literal(g, res, self.modified, string, XSD.dateTime)

    def create_title(self, g, res, string):
        return create_literal(g, res, self.title, string)

    def create_description(self, g, res, string):
        return create_literal(g, res, self.description, string)

    def create_keyword(self, g, res, string):
        return create_literal(g, res, self.keyword, string)

    def create_byte_size(self, g, res, string):
        return create_literal(g, res, self.byte_size, string, XSD.decimal)
    # /END Create Literal

    # /START Create Complex Attributes
    def has_subject(self, g, res, obj):
        g.add((res, self.subject, obj))

    def has_landing_page(self, g, res, obj):
        g.add((res, self.landing_page, obj))

    def has_subset(self, g, res, obj):
        g.add((res, self.subset, obj))

    def has_sparql_endpoint(self, g, res, obj):
        g.add((res, self.sparql_endpoint, obj))

    def has_distribution(self, g, res, obj):
        g.add((res, self.distribution, obj))

    def has_license(self, g, res, obj):
        g.add((res, self.license, obj))

    def has_download_url(self, g, res, obj):
        g.add((res, self.download_url, obj))

    def has_media_type(self, g, res, obj):
        g.add((res, self.media_type, obj))
    # /END Create Complex Attributes

    # /START Types
    def dataset_type(self, g, res):
        create_type(g, res, self.dataset)

    def distribution_type(self, g, res):
        create_type(g, res, self.datafile)
    # /END Types

    def update_dataset_info(self, graph_set):
        cur_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
        subgraphs_to_update = set()
        all_graphs = []

        for g in graph_set.graphs():
            cur_id = g.identifier
            if cur_id not in subgraphs_to_update:
                subgraphs_to_update.add(cur_id)
                cur_dataset_res = URIRef(cur_id)
                cur_dataset = self.get_dataset_graph(cur_dataset_res, cur_time)
                self.update_modification_date(cur_dataset, cur_dataset_res, cur_time)
                all_graphs += [cur_dataset]

        if subgraphs_to_update:
            cur_occ_res = URIRef(self.base_iri)
            cur_occ = self.get_dataset_graph(cur_occ_res, cur_time)
            self.update_modification_date(cur_occ, cur_occ_res, cur_time)

            for subgraph_id in subgraphs_to_update:
                self.has_subset(cur_occ, cur_occ_res, URIRef(subgraph_id))
            all_graphs += [cur_occ]

        if all_graphs:  # Store everything and upload to triplestore
            if self.tp_url is None:
                self.st.store_all(
                    self.base_dir, self.base_iri, self.context_path,
                    self.tmp_dir, all_graphs, True)
            else:
                self.st.upload_and_store(
                    self.base_dir, self.tp_url, self.base_iri, self.context_path,
                    self.tmp_dir, all_graphs, True)

    def get_dataset_graph(self, res, cur_time):
        dataset_path = self.get_metadata_path_from_resource(res)
        if os.path.exists(dataset_path):
            return list(self.st.load(dataset_path, tmp_dir=self.tmp_dir).contexts())[0]
        else:
            dataset_label = "OCC"
            dataset_title = "The OpenCitations Corpus"
            dataset_description = "The OpenCitations Corpus is an open repository of scholarly " \
                                  "citation data made available under a Creative Commons public " \
                                  "domain dedication, which provides in RDF accurate citation " \
                                  "information (bibliographic references) harvested from the " \
                                  "scholarly literature (described using the SPAR Ontologies) " \
                                  "that others may freely build upon, enhance and reuse for any " \
                                  "purpose, without restriction under copyright or database law."
            if re.search("/../$", str(res)) is not None:
                g = Graph(identifier=str(res))
                dataset_short_name = str(res)[-3:-1]
                dataset_name = GraphSet.labels[dataset_short_name]
                dataset_title += ": %s dataset" % dataset_name.title()
                dataset_description += " This sub-dataset contains all the '%s' resources." % \
                                       dataset_name
                dataset_label += " / %s" % dataset_short_name
                self.create_keyword(g, res, dataset_name)
            else:
                g = Graph()
                self.has_landing_page(g, res, self.dataset_home)
                self.has_sparql_endpoint(g, res, self.tp_res)
            self.dataset_type(g, res)
            self.create_label(g, res, dataset_label)
            self.create_title(g, res, dataset_title)
            self.create_description(g, res, dataset_description)
            self.create_publication_date(g, res, cur_time)
            self.create_keyword(g, res, "OCC")
            self.create_keyword(g, res, "OpenCitations")
            self.create_keyword(g, res, "OpenCitations Corpus")
            self.create_keyword(g, res, "SPAR Ontologies")
            self.create_keyword(g, res, "bibliographic references")
            self.create_keyword(g, res, "citations")
            self.has_subject(g, res, self.bibliographic_database)
            self.has_subject(g, res, self.scholary_communication)
            self.has_subject(g, res, self.open_access)
            self.has_subject(g, res, self.citations)

            return g

    def get_metadata_path_from_resource(self, dataset_res):
        return self.get_metadata_path_from_iri(str(dataset_res))

    def get_metadata_path_from_iri(self, dataset_iri):
        return re.sub("^%s" % self.base_iri, self.base_dir, dataset_iri) + "index.json"
Ejemplo n.º 25
0
class VeuszEngine(BotEngine):
    internal_name: str = "[VeuszEngine]"
    #
    g: Embedded = None
    title: str = field(default="Notitle")
    pages_info: dict = field(default_factory=dict)
    _xy: Any = None  # flag for animation
    #
    showkey: bool = True
    keyBorderHide: bool = True
    keyFontSize: int = 14
    plot_line: bool = True
    #
    xname: str = "x"
    yname: str = "y"
    xlog: bool = False
    ylog: bool = False
    ymin: str = "Auto"
    ymax: str = "Auto"
    xmin: str = "Auto"
    xmax: str = "Auto"
    #
    transparency: int = 50

    def __post_init__(self):
        self.storer = Storer(exit_dump=False)

        self.g = veusz.Embedded(name=self.title, hidden=self.hidden)
        self.g.EnableToolbar()
        self.init_pages()

    def _init(self, page_name=""):
        # creating initial values for plotting per page.
        self.storer.put(what="xname", name=page_name + "/xname")
        self.storer.put(what="yname", name=page_name + "/yname")
        self.storer.put(what=False, name=page_name + "/xlog")
        self.storer.put(what=False, name=page_name + "/ylog")
        self.storer.put(what="Auto", name=page_name + "/xmin")
        self.storer.put(what="Auto", name=page_name + "/xmax")
        self.storer.put(what="Auto", name=page_name + "/ymin")
        self.storer.put(what="Auto", name=page_name + "/ymax")

    def init_pages(self):
        if self.pages_info:
            for page in self.pages_info:
                self._init(page_name=page)
                for prop in self.pages_info[page]:
                    self.storer.put(what=self.pages_info[page][prop],
                                    name=page + "/" + prop)
        else:
            self._init(page_name="page1")
            self.storer.put(what=self.xname, name="page1/xname")
            self.storer.put(what=self.yname, name="page1/yname")

            self.storer.put(what=self.xlog, name="page1/xlog")
            self.storer.put(what=self.ylog, name="page1/ylog")

            self.storer.put(what=self.xmin, name="page1/xmin")
            self.storer.put(what=self.xmax, name="page1/xmax")

            self.storer.put(what=self.ymax, name="page1/ymax")
            self.storer.put(what=self.ymin, name="page1/ymin")

    def get_page(self, name="page1"):

        try:
            self.page = self.g.Root[name]
            _num_lines = self.storer.get(name=name + "/_num_lines")
            __num_lines = self.storer.get(
                name=name +
                "/__num_lines")  # if save_previous_state is applied
        except KeyError:
            self.page = self.g.Root.Add("page")
            self.page.Rename(name)
            __num_lines = 1
            _num_lines = 1
            self.storer.put(what=_num_lines, name=name + "/_num_lines")
            self.storer.put(what=__num_lines, name=name + "/__num_lines")

        self.page.width.val = '15cm'
        self.page.height.val = '10cm'

        try:
            self.graph = self.g.Root[name + '/graph1']
        except:
            self.graph = self.page.Add('graph')

        try:
            # key exist
            self.key = self.g.Root[name + "/graph1/key1"]
        except:
            if self.showkey:
                self.graph.Add('key')
                self.graph.key1.Border.hide.val = self.keyBorderHide
                self.graph.key1.Text.size.val = f"{str(self.keyFontSize)}pt"

        return _num_lines, __num_lines

    def plot(
        self,
        x: List,
        y: List,
        key_name_f: str = "",
        key_name: str = "",
        marker_size: str = "2.5pt",
        plot_line: bool = True,
        color_num: Union[str, int] = "auto",
        marker_type: Union[str, int] = "auto",
        line_type: Union[str, int] = "auto",
        save_previous_state: bool = False,
        animation: bool = False,
        error_style: str = None,
        internal_text: str = "",
        fill_marker: bool = False,
        page: str = "page1",
    ):

        _num_lines, __num_lines = self.get_page(name=page)

        if animation:
            color_num = _num_lines
            line_type = _num_lines
            save_previous_state = True
            xy = self._xy

        if save_previous_state: _num_lines -= 1

        if color_num == "auto": color_num = _num_lines
        if line_type == "auto": line_type = _num_lines

        if not animation:
            x_dataname = self.xname + str(_num_lines) + str(
                save_previous_state) + str(__num_lines) + str(page)
            y_dataname = self.yname + str(_num_lines) + str(
                save_previous_state) + str(__num_lines) + str(page)
        else:
            x_dataname = self.xname + str(_num_lines) + str(
                save_previous_state) + str(page)
            y_dataname = self.yname + str(_num_lines) + str(
                save_previous_state) + str(page)

        x_dataname += internal_text
        y_dataname += internal_text

        if len(np.shape(x)) == 2:
            x_arr = np.array(x)
            x_data, x_data_err = x_arr[:, 0], x_arr[:, 1]
            self.g.SetData(x_dataname, x_data, symerr=x_data_err)
        else:
            x_arr = np.array(x)
            x_data = x_arr
            self.g.SetData(x_dataname, x_data)

        if len(np.shape(y)) == 2:
            y_arr = np.array(y)
            y_data, y_data_err = y_arr[:, 0], y_arr[:, 1]
            self.g.SetData(y_dataname, y_data, symerr=y_data_err)
        else:
            y_arr = np.array(y)
            y_data = y_arr
            self.g.SetData(y_dataname, y_data)

        # self.graph = self.g.Root[name + '/graph1']
        if animation:
            if not self._xy:
                self._xy = xy = self.g.Root[page + '/graph1'].Add('xy')
        else:
            xy = self.g.Root[page + '/graph1'].Add('xy')

        # nn.plotter_progress.g.Root.xyz_file.graph1.xy1.Clone(nn.plotter_progress.g.Root.xyz_file.graph1, 'xy7')
        xy.xData.val = x_dataname
        xy.yData.val = y_dataname
        if marker_type != "auto": xy.marker.val = get_marker_type(marker_type)
        else: xy.marker.val = get_marker_type(line_type)

        if color_num % 2: xy.MarkerFill.color.val = get_line_color(color_num)
        else: xy.MarkerFill.color.val = 'white'

        if fill_marker: xy.MarkerFill.color.val = get_line_color(color_num)

        xy.MarkerLine.color.val = get_line_color(color_num)
        xy.markerSize.val = marker_size
        xy.PlotLine.width.val = '1pt'
        xy.PlotLine.style.val = get_line_type(line_type)
        xy.PlotLine.color.val = get_line_color(color_num)
        xy.PlotLine.hide.val = not plot_line

        if error_style:
            xy.errorStyle.val = error_style
            xy.FillBelow.color.val = get_line_color(color_num)
            xy.FillBelow.transparency.val = int(self.transparency)
            xy.FillAbove.color.val = get_line_color(color_num)
            xy.FillAbove.transparency.val = int(self.transparency)

            #ErrorBarLine/style
            xy.ErrorBarLine.color.val = get_line_type(line_type)
            xy.ErrorBarLine.style.val = get_line_type(line_type)
        else:
            xy.errorStyle.val = 'none'

        xy.ErrorBarLine.width.val = '1pt'
        xy.ErrorBarLine.color.val = get_line_color(color_num)
        if self.showkey and key_name_f:
            xy.key.val = self.name_converter(key_name_f)
        if self.showkey and key_name: xy.key.val = key_name

        x_axis = self.graph.x
        y_axis = self.graph.y

        x_axis.label.val = self.storer.get(page + "/xname")  # self.xname
        y_axis.label.val = self.storer.get(page + "/yname")  # self.yname

        x_axis.log.val = self.storer.get(page + "/xlog")  # self.xlog
        y_axis.log.val = self.storer.get(page + "/ylog")  # self.ylog

        x_axis.min.val = self.storer.get(page + "/xmin")  # self.xmin
        x_axis.max.val = self.storer.get(page + "/xmax")  # self.xmax

        y_axis.min.val = self.storer.get(page + "/ymin")  # self.ymin
        y_axis.max.val = self.storer.get(page + "/ymax")  # self.ymax

        _num_lines += 1
        __num_lines += 1
        self.storer.put(_num_lines, name=page + "/_num_lines")
        self.storer.put(__num_lines, name=page + "/__num_lines")

    def export(self,
               filename: str = "output.pdf",
               extension: str = "pdf",
               color: bool = True,
               page: int = 0,
               dpi: int = 100,
               antialias: bool = True,
               quality: int = 85,
               backcolor: str = '#ffffff00',
               pdfdpi: int = 150,
               svgtextastext: bool = False):
        if not filename or not extension:
            print(
                f"{self.internal_name} You have to specify filename and extension!"
            )
            print(
                f"{self.internal_name} For example: filename='my_amazing_figure', extension='pdf'"
            )
            print(
                f"{self.internal_name}              color=True, extension='pdf', quality='85', pdfdpi='150'"
            )
            print(
                f"{self.internal_name} Available extensions: [pdf]/[eps]/[ps]/[svg]/[jpg]/[jpeg]/[bmp]/[png]"
            )
        else:
            self.g.Export(filename,
                          color=color,
                          page=page,
                          dpi=dpi,
                          antialias=antialias,
                          quality=quality,
                          backcolor=backcolor,
                          pdfdpi=pdfdpi,
                          svgtextastext=svgtextastext)

    def save(self, filename=None):
        if not filename:
            print(
                f"{self.internal_name} You have to specify filename! [Labels from Y and X will be added automatically]"
            )
        else:
            if filename.find(".") != -1 or filename.find(":") or filename.find(
                    "\\") or filename.find("*") or filename.find(
                        "/") or filename.find("\\\\"):
                print(
                    f"{self.internal_name} I found forbidden symbols [.]/[:]..."
                )
                filename.replace(".", "").replace(":", "_").replace(
                    "\\\\",
                    "").replace("*", "").replace("/", "_").replace("\\", "")

            # latex reduction
            xname = self.xname.replace("\\italic", "").replace(
                "{",
                "").replace("}", "").replace("_", "").replace("^", "").replace(
                    "\\\\", "").replace("\\",
                                        "").replace("/", "_").replace("*", "")
            yname = self.yname.replace("\\italic", "").replace(
                "{",
                "").replace("}", "").replace("_", "").replace("^", "").replace(
                    "\\\\", "").replace("\\",
                                        "").replace("/", "_").replace("*", "")
            # space reduction
            xname = xname.replace(" ", "")
            yname = yname.replace(" ", "")

            name4saving = filename + "_" + yname + "_" + xname

            if not os.path.exists(name4saving + ".vsz"):
                self.g.Save(name4saving + ".vsz")
            else:
                print(f"{self.internal_name} The file exists!")
                i = 0
                while os.path.exists(name4saving + str(i) + ".vsz"):
                    i += 1
                name4saving += str(i) + ".vsz"
                self.g.Save(name4saving)
                print(f"{self.internal_name} Saved! filename: {name4saving}")
Ejemplo n.º 26
0
class DatasetHandler(object):
    DCTERMS = Namespace("http://purl.org/dc/terms/")
    DCAT = Namespace("http://www.w3.org/ns/dcat#")
    VOID = Namespace("http://rdfs.org/ns/void#")
    MTT = Namespace("https://w3id.org/spar/mediatype/text/")
    DBR = Namespace("http://dbpedia.org/resource/")

    dataset = DCAT.Dataset
    datafile = DCAT.Distribution

    title = DCTERMS.title
    description = DCTERMS.description
    issued = DCTERMS.issued
    modified = DCTERMS.modified
    keyword = DCAT.keyword
    subject = DCAT.theme
    landing_page = DCAT.landingPage
    subset = VOID.subset
    sparql_endpoint = VOID.sparqlEndpoint
    distribution = DCAT.distribution
    license = DCTERMS.license
    download_url = DCAT.downloadURL
    media_type = DCAT.mediaType
    byte_size = DCAT.byte_size
    label = RDFS.label
    a = RDF.type
    turtle = MTT.turtle
    bibliographic_database = DBR.Bibliographic_database
    open_access = DBR.Open_access
    scholary_communication = DBR.Scholarly_communication
    citations = DBR.Citation

    def __init__(self,
                 tp_url_real,
                 context_path,
                 context_file_path,
                 base_iri,
                 base_dir,
                 info_dir,
                 dataset_home,
                 tmp_dir,
                 triplestore_url=None):
        self.tp_url = triplestore_url
        self.base_iri = base_iri
        self.base_dir = base_dir
        self.info_dir = info_dir
        self.context_path = context_path
        self.dataset_home = URIRef(dataset_home)
        self.tmp_dir = tmp_dir
        self.tp_res = URIRef(tp_url_real)
        self.repok = Reporter(prefix="[DatasetHandler: INFO] ")
        self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ")
        self.st = Storer(context_map={context_path: context_file_path},
                         repok=self.repok,
                         reperr=self.reperr)
        self.st.set_preface_query(
            u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" %
            (str(DatasetHandler.modified), str(
                DatasetHandler.dataset), str(DatasetHandler.modified)))

    # /START Create Literal
    def create_label(self, g, res, string):
        return create_literal(g, res, RDFS.label, string)

    def create_publication_date(self, g, res, string):
        return create_literal(g, res, self.issued, string, XSD.dateTime)

    def update_modification_date(self, g, res, string):
        g.remove((res, self.modified, None))
        return create_literal(g, res, self.modified, string, XSD.dateTime)

    def create_title(self, g, res, string):
        return create_literal(g, res, self.title, string)

    def create_description(self, g, res, string):
        return create_literal(g, res, self.description, string)

    def create_keyword(self, g, res, string):
        return create_literal(g, res, self.keyword, string)

    def create_byte_size(self, g, res, string):
        return create_literal(g, res, self.byte_size, string, XSD.decimal)

    # /END Create Literal

    # /START Create Complex Attributes
    def has_subject(self, g, res, obj):
        g.add((res, self.subject, obj))

    def has_landing_page(self, g, res, obj):
        g.add((res, self.landing_page, obj))

    def has_subset(self, g, res, obj):
        g.add((res, self.subset, obj))

    def has_sparql_endpoint(self, g, res, obj):
        g.add((res, self.sparql_endpoint, obj))

    def has_distribution(self, g, res, obj):
        g.add((res, self.distribution, obj))

    def has_license(self, g, res, obj):
        g.add((res, self.license, obj))

    def has_download_url(self, g, res, obj):
        g.add((res, self.download_url, obj))

    def has_media_type(self, g, res, obj):
        g.add((res, self.media_type, obj))

    # /END Create Complex Attributes

    # /START Types
    def dataset_type(self, g, res):
        create_type(g, res, self.dataset)

    def distribution_type(self, g, res):
        create_type(g, res, self.datafile)

    # /END Types

    def update_dataset_info(self, graph_set):
        cur_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S')
        subgraphs_to_update = set()
        all_graphs = []

        for g in graph_set.graphs():
            cur_id = g.identifier
            if cur_id not in subgraphs_to_update:
                subgraphs_to_update.add(cur_id)
                cur_dataset_res = URIRef(cur_id)
                cur_dataset = self.get_dataset_graph(cur_dataset_res, cur_time)
                self.update_modification_date(cur_dataset, cur_dataset_res,
                                              cur_time)
                all_graphs += [cur_dataset]

        if subgraphs_to_update:
            cur_occ_res = URIRef(self.base_iri)
            cur_occ = self.get_dataset_graph(cur_occ_res, cur_time)
            self.update_modification_date(cur_occ, cur_occ_res, cur_time)

            for subgraph_id in subgraphs_to_update:
                self.has_subset(cur_occ, cur_occ_res, URIRef(subgraph_id))
            all_graphs += [cur_occ]

        if all_graphs:  # Store everything and upload to triplestore
            if self.tp_url is None:
                self.st.store_all(self.base_dir, self.base_iri,
                                  self.context_path, self.tmp_dir, all_graphs,
                                  True)
            else:
                self.st.upload_and_store(self.base_dir, self.tp_url,
                                         self.base_iri, self.context_path,
                                         self.tmp_dir, all_graphs, True)

    def get_dataset_graph(self, res, cur_time):
        dataset_path = self.get_metadata_path_from_resource(res)
        if os.path.exists(dataset_path):
            return list(
                self.st.load(dataset_path, tmp_dir=self.tmp_dir).contexts())[0]
        else:
            dataset_label = "OCC"
            dataset_title = "The OpenCitations Corpus"
            dataset_description = "The OpenCitations Corpus is an open repository of scholarly " \
                                  "citation data made available under a Creative Commons public " \
                                  "domain dedication, which provides in RDF accurate citation " \
                                  "information (bibliographic references) harvested from the " \
                                  "scholarly literature (described using the SPAR Ontologies) " \
                                  "that others may freely build upon, enhance and reuse for any " \
                                  "purpose, without restriction under copyright or database law."
            if re.search("/../$", str(res)) is not None:
                g = Graph(identifier=str(res))
                dataset_short_name = str(res)[-3:-1]
                dataset_name = GraphSet.labels[dataset_short_name]
                dataset_title += ": %s dataset" % dataset_name.title()
                dataset_description += " This sub-dataset contains all the '%s' resources." % \
                                       dataset_name
                dataset_label += " / %s" % dataset_short_name
                self.create_keyword(g, res, dataset_name)
            else:
                g = Graph()
                self.has_landing_page(g, res, self.dataset_home)
                self.has_sparql_endpoint(g, res, self.tp_res)
            self.dataset_type(g, res)
            self.create_label(g, res, dataset_label)
            self.create_title(g, res, dataset_title)
            self.create_description(g, res, dataset_description)
            self.create_publication_date(g, res, cur_time)
            self.create_keyword(g, res, "OCC")
            self.create_keyword(g, res, "OpenCitations")
            self.create_keyword(g, res, "OpenCitations Corpus")
            self.create_keyword(g, res, "SPAR Ontologies")
            self.create_keyword(g, res, "bibliographic references")
            self.create_keyword(g, res, "citations")
            self.has_subject(g, res, self.bibliographic_database)
            self.has_subject(g, res, self.scholary_communication)
            self.has_subject(g, res, self.open_access)
            self.has_subject(g, res, self.citations)

            return g

    def get_metadata_path_from_resource(self, dataset_res):
        return self.get_metadata_path_from_iri(str(dataset_res))

    def get_metadata_path_from_iri(self, dataset_iri):
        return re.sub("^%s" % self.base_iri, self.base_dir,
                      dataset_iri) + "index.json"
Ejemplo n.º 27
0
                            help="The directory where to look for duplicates.")
    arg_parser.add_argument("-o",
                            "--output_file",
                            dest="o_file",
                            help="The file where to write the results.")
    arg_parser.add_argument(
        "--recursive",
        dest="rec",
        default=False,
        action="store_true",
        help="The process will consider also the subdir recursively.")
    args = arg_parser.parse_args()

    id_doc = {}

    s = Storer(context_map={context_path: context_file_path})

    all_files = []
    if args.rec:
        for cur_dir, cur_subdir, cur_files in os.walk(args.i_dir):
            for cur_file in cur_files:
                if cur_file.endswith(".json"):
                    all_files += [cur_dir + os.sep + cur_file]
    else:
        for cur_file in os.listdir(args.i_dir):
            if cur_file.endswith(".json"):
                all_files += [args.i_dir + os.sep + cur_file]

    for rdf_path in all_files:
        cur_g = s.load(rdf_path, tmp_dir=temp_dir_for_rdf_loading)
        try:
Ejemplo n.º 28
0
 def test_separation(self):
     """
     Test separation procedure
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=1, _test=True)
     s.put(1, name="one")    # 1
     s.put(2, name="two")    # 2
     s.put(3, name="three")  # 3
     s.put(4, name="four")   # 4
     s.dump()                
     s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True)
     length = len(s2.backup_list)
     assert length == 4, f"Got something different: {length}"
     s._cleanup()
Ejemplo n.º 29
0
import init_bot
import telegram
import os
import pytz
from datetime import datetime, timedelta
from storer import Storer
from user_info import UserInfo
from telegram.ext import Updater, CommandHandler, CallbackQueryHandler, Job

STORED_FILE = os.getenv('UNI_STORED_FILE', 'unison_bot_shelve.db')

MENU, AWAIT_INPUT_GAME, AWAIT_MEETING_ANSWER = range(3)
state = dict()

users = {}
users_store = Storer(STORED_FILE)
forum_subscribers = dict()

meeting_subscribers = []
komsostav = []

posts_from_forum = []
last_check_new_posts = 0
UPDATE_FORUM_POSTS_TIMEOUT_SEC = 10. * 60
DEL_FORUM_POSTS_TIMEOUT_SEC = 24 * 60. * 60

logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Ejemplo n.º 30
0
 def test_loading_separation_pbz2(self):
     """
     Test separation procedure
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=True, separations=1, _test=True)
     s.put(1, name="one")    # _
     s.put(2, name="two")    # 0
     s.put(3, name="three")  # 1
     s.put(4, name="four")   # 2
     s.dump()                # 3
     s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=True, _test=True)
     one = s2.get(name="one")
     assert 1 == one, f"Got something different: {one}"
     two = s2.get(name="two")
     assert 2 == two, f"Got something different: {two}"
     three = s2.get(name="three")
     assert 3 == three, f"Got something different: {three}"
     four = s2.get(name="four")
     assert 4 == four, f"Got something different: {four}"
     s._cleanup()
Ejemplo n.º 31
0
    def test_long_loop_loading(self):
        """
        Get method in loop:
        Checking for not overloading internal data dict
        """
        s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=10000, _test= True)
        cycles = 1000
        for idx in range(cycles): 
            value = s.get(name=str(idx))
            if not value: s.put(what=idx, name=str(idx))
        s.dump()

        s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=10000, _test= True)
        for idx in range(cycles): 
            value = s2.get(name=str(idx))
            assert value == idx, f"Got something different: {value} != {idx}"
        s._cleanup()
Ejemplo n.º 32
0
class ResourceFinder(object):
    def __init__(self,
                 g_set=None,
                 ts_url=None,
                 base_dir=None,
                 base_iri=None,
                 tmp_dir=None,
                 context_map={}):
        self.g = Graph()
        self.base_dir = base_dir
        self.base_iri = base_iri
        self.storer = Storer(context_map=context_map)
        self.tmp_dir = tmp_dir
        self.name = "SPACIN " + self.__class__.__name__
        self.loaded = set()
        if g_set is not None:
            self.update_graph_set(g_set)
        if ts_url is None:
            self.ts = None
        else:
            self.ts = ConjunctiveGraph('SPARQLUpdateStore')
            self.ts.open((ts_url, ts_url))

    def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None):
        if self.base_dir is not None and self.base_iri is not None:
            cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri,
                                       dir_split_number, items_per_file)[1]
            if cur_file_path.endswith("index.json"):
                cur_path = cur_file_path.replace("index.json", "") + "prov"
            else:
                cur_path = cur_file_path[:-5] + os.sep + "prov"

            file_list = []
            if os.path.isdir(cur_path):
                for cur_dir, cur_subdir, cur_files in os.walk(cur_path):
                    for cur_file in cur_files:
                        if cur_file.endswith(".json") and \
                           (prov_entity_type is None or cur_file.startswith(prov_entity_type)):
                            file_list += [cur_dir + os.sep + cur_file]

            for file_path in file_list:
                if file_path not in self.loaded:
                    self.loaded.add(file_path)
                    cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir)
                    self.add_triples_in_graph(cur_g)

    def add_triples_in_graph(self, g):
        if g is not None:
            for s, p, o in g.triples((None, None, None)):
                self.g.add((s, p, o))

    def update_graph_set(self, g_set):
        for g in g_set.graphs():
            self.add_triples_in_graph(g)

    def retrieve(self, id_dict):
        for id_type in id_dict:
            for id_string in id_dict[id_type]:
                res = self.__id_with_type(id_string, id_type)
                if res is not None:
                    return res

    def retrieve_provenance_agent_from_name(self, string):
        query = """
            SELECT DISTINCT ?pa WHERE {
              ?pa a <%s> ;
                <%s> "%s"
            } LIMIT 1
        """ % (ProvEntity.prov_agent, GraphEntity.name, string)
        return self.__query(query)

    def retrieve_from_orcid(self, string):
        return self.__id_with_type(string, GraphEntity.orcid)

    def retrieve_citing_from_doi(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.doi,
                                   "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_pmid(self, string):
        return self.__id_with_type(string, GraphEntity.pmid,
                                   "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_pmcid(self, string):
        return self.__id_with_type(string, GraphEntity.pmcid,
                                   "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_url(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.url,
                                   "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_from_doi(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.doi)

    def retrieve_from_pmid(self, string):
        return self.__id_with_type(string, GraphEntity.pmid)

    def retrieve_from_pmcid(self, string):
        return self.__id_with_type(string, GraphEntity.pmcid)

    def retrieve_from_url(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.url)

    def retrieve_from_issn(self, string):
        return self.__id_with_type(string, GraphEntity.issn)

    def retrieve_from_isbn(self, string):
        return self.__id_with_type(string, GraphEntity.isbn)

    def retrieve_issue_from_journal(self, id_dict, issue_id, volume_id):
        if volume_id is None:
            return self.__retrieve_from_journal(id_dict,
                                                GraphEntity.journal_issue,
                                                issue_id)
        else:
            retrieved_volume = self.retrieve_volume_from_journal(
                id_dict, volume_id)
            if retrieved_volume is not None:
                query = """
                    SELECT DISTINCT ?br WHERE {
                        ?br a <%s> ;
                            <%s> <%s> ;
                            <%s> "%s"
                    } LIMIT 1
                """ % (GraphEntity.journal_issue, GraphEntity.part_of,
                       str(retrieved_volume),
                       GraphEntity.has_sequence_identifier, issue_id)
                return self.__query(query)

    def retrieve_volume_from_journal(self, id_dict, volume_id):
        return self.__retrieve_from_journal(id_dict,
                                            GraphEntity.journal_volume,
                                            volume_id)

    def retrieve_br_url(self, res, string):
        return self.__retrieve_res_id_by_type(res, string.lower(),
                                              GraphEntity.url)

    def retrieve_br_doi(self, res, string):
        return self.__retrieve_res_id_by_type(res, string.lower(),
                                              GraphEntity.doi)

    def retrieve_br_pmid(self, res, string):
        return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmid)

    def retrieve_br_pmcid(self, res, string):
        return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmcid)

    def retrieve_last_snapshot(self, prov_subj):
        query = """
            SELECT DISTINCT ?se WHERE {
                ?se <%s> <%s> .
                FILTER NOT EXISTS {?se <%s> ?ca }
            } LIMIT 1
        """ % (ProvEntity.specialization_of, str(prov_subj),
               ProvEntity.was_invalidated_by)
        return self.__query(query)

    def __retrieve_res_id_by_type(self, res, id_string, id_type):
        if id_string is not None:
            query = """
            SELECT DISTINCT ?id WHERE {
                <%s> <%s> ?id .
                ?id <%s> <%s> ;
                    <%s> "%s"
            }""" % (res, GraphEntity.has_identifier,
                    GraphEntity.uses_identifier_scheme, id_type,
                    GraphEntity.has_literal_value, id_string)
            return self.__query(query)

    def __retrieve_from_journal(self, id_dict, part_type, part_seq_id):
        for id_type in id_dict:
            for id_string in id_dict[id_type]:
                query = """
                SELECT DISTINCT ?res WHERE {
                    ?j <%s> ?id .
                    ?id
                        <%s> <%s> ;
                        <%s> "%s" .
                    ?res a <%s> ;
                        <%s>+ ?j ;
                        <%s> "%s"
                }""" % (GraphEntity.has_identifier,
                        GraphEntity.uses_identifier_scheme, id_type,
                        GraphEntity.has_literal_value, id_string, part_type,
                        GraphEntity.part_of,
                        GraphEntity.has_sequence_identifier, part_seq_id)

                return self.__query(query)

    def __id_with_type(self, id_string, id_type, extras=""):
        query = """
        SELECT DISTINCT ?res WHERE {
            ?res <%s> ?id .
            ?id
                <%s> <%s> ;
                <%s> "%s" .
                %s
        }""" % (GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme,
                id_type, GraphEntity.has_literal_value, id_string, extras)

        return self.__query(query)

    def __query(self, query):
        if self.ts is not None:
            result = self.ts.query(query)
            for res, in result:
                return res

        # If nothing has been returned, check if there is something
        # in the current graph set
        result = self.g.query(query)
        for res, in result:
            return res
Ejemplo n.º 33
0
                            print "\n\nProcess file '%s'\n" % cur_file_path
                            json_object = json.load(fp)
                            crp = CrossrefProcessor(base_iri, context_path, info_dir, json_object,
                                                    ResourceFinder(ts_url=triplestore_url),
                                                    ORCIDFinder(orcid_conf_path))
                            result = crp.process()
                            if result is not None:
                                prov = ProvSet(result, base_iri, context_path, info_dir,
                                               ResourceFinder(base_dir=base_dir, base_iri=base_iri,
                                                              tmp_dir=temp_dir_for_rdf_loading,
                                                              context_map=
                                                              {context_path: context_file_path}))
                                prov.generate_provenance()

                                res_storer = Storer(result,
                                                    context_map={context_path: context_file_path},
                                                    dir_split=dir_split_number,
                                                    n_file_item=items_per_file)
                                res_storer.upload_and_store(
                                    base_dir, triplestore_url, base_iri, context_path,
                                    temp_dir_for_rdf_loading)

                                prov_storer = Storer(prov,
                                                     context_map={context_path: context_file_path},
                                                     dir_split=dir_split_number,
                                                     n_file_item=items_per_file)
                                prov_storer.store_all(
                                    base_dir, base_iri, context_path,
                                    temp_dir_for_rdf_loading)

                                dset_handler = DatasetHandler(triplestore_url_real,
                                                              context_path,
Ejemplo n.º 34
0
                            if result is not None:
                                prov = ProvSet(
                                    result, base_iri, context_path, info_dir,
                                    ResourceFinder(
                                        base_dir=base_dir,
                                        base_iri=base_iri,
                                        tmp_dir=temp_dir_for_rdf_loading,
                                        context_map={
                                            context_path: context_file_path
                                        }))
                                prov.generate_provenance()

                                res_storer = Storer(result,
                                                    context_map={
                                                        context_path:
                                                        context_file_path
                                                    },
                                                    dir_split=dir_split_number,
                                                    n_file_item=items_per_file)
                                res_storer.upload_and_store(
                                    base_dir, triplestore_url, base_iri,
                                    context_path, temp_dir_for_rdf_loading)

                                prov_storer = Storer(
                                    prov,
                                    context_map={
                                        context_path: context_file_path
                                    },
                                    dir_split=dir_split_number,
                                    n_file_item=items_per_file)
                                prov_storer.store_all(
Ejemplo n.º 35
0
 def __init__(self):
     self.storer = Storer(db="Quora", collection="user")
     self.parser = Parser("http://www.quora.com/profile/")
Ejemplo n.º 36
0
from telegram.ext import CommandHandler, Filters, MessageHandler, Updater

TOKEN = ''  # TG bot token
VK_TOKEN = ''  # VK access token
PUBLIC_ID = 0  # vk.com/wall<PUBLIC_ID>_...
INTERVAL = 60  # Seconds
IGNORE_TAGS = []  # ['#IGNORE', '#HASHTAGS']

# Enable logging
logging.basicConfig(format='[%(asctime)s][%(levelname)s] - %(message)s',
                    level=logging.INFO)
logger = logging.getLogger(__name__)

# Database
storer = Storer('bot.db')


def vk(method, params):
    params['access_token'] = VK_TOKEN
    params['v'] = 5.74
    q = requests.post('https://api.vk.com/method/' + method, data=params)
    return json.loads(q.text)


def start(bot, update):
    if storer.restore('cid') is None:
        update.message.reply_text(
            'To get started, add me to the channel\'s administrators, '
            'then forward any message from the channel to this dialog')
Ejemplo n.º 37
0
class ResourceFinder(object):

    def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None,
                 tmp_dir=None, context_map={}):
        self.g = Graph()
        self.base_dir = base_dir
        self.base_iri = base_iri
        self.storer = Storer(context_map=context_map)
        self.tmp_dir = tmp_dir
        self.name = "SPACIN " + self.__class__.__name__
        self.loaded = set()
        if g_set is not None:
            self.update_graph_set(g_set)
        if ts_url is None:
            self.ts = None
        else:
            self.ts = ConjunctiveGraph('SPARQLUpdateStore')
            self.ts.open((ts_url, ts_url))

    def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None):
        if self.base_dir is not None and self.base_iri is not None:
            cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri,
                                       dir_split_number, items_per_file)[1]
            if cur_file_path.endswith("index.json"):
                cur_path = cur_file_path.replace("index.json", "") + "prov"
            else:
                cur_path = cur_file_path[:-5] + os.sep + "prov"

            file_list = []
            if os.path.isdir(cur_path):
                for cur_dir, cur_subdir, cur_files in os.walk(cur_path):
                    for cur_file in cur_files:
                        if cur_file.endswith(".json") and \
                           (prov_entity_type is None or cur_file.startswith(prov_entity_type)):
                            file_list += [cur_dir + os.sep + cur_file]

            for file_path in file_list:
                if file_path not in self.loaded:
                    self.loaded.add(file_path)
                    cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir)
                    self.add_triples_in_graph(cur_g)

    def add_triples_in_graph(self, g):
        if g is not None:
            for s, p, o in g.triples((None, None, None)):
                self.g.add((s, p, o))

    def update_graph_set(self, g_set):
        for g in g_set.graphs():
            self.add_triples_in_graph(g)

    def retrieve(self, id_dict):
        for id_type in id_dict:
            for id_string in id_dict[id_type]:
                res = self.__id_with_type(id_string, id_type)
                if res is not None:
                    return res

    def retrieve_provenance_agent_from_name(self, string):
        query = """
            SELECT DISTINCT ?pa WHERE {
              ?pa a <%s> ;
                <%s> "%s"
            } LIMIT 1
        """ % (ProvEntity.prov_agent,
               GraphEntity.name, string)
        return self.__query(query)

    def retrieve_from_orcid(self, string):
        return self.__id_with_type(string, GraphEntity.orcid)

    def retrieve_citing_from_doi(self, string):
        return self.__id_with_type(
            string.lower(), GraphEntity.doi, "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_pmid(self, string):
        return self.__id_with_type(
            string, GraphEntity.pmid, "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_pmcid(self, string):
        return self.__id_with_type(
            string, GraphEntity.pmcid, "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_citing_from_url(self, string):
        return self.__id_with_type(
            string.lower(), GraphEntity.url, "?res <%s> ?cited" % GraphEntity.cites)

    def retrieve_from_doi(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.doi)

    def retrieve_from_pmid(self, string):
        return self.__id_with_type(string, GraphEntity.pmid)

    def retrieve_from_pmcid(self, string):
        return self.__id_with_type(string, GraphEntity.pmcid)

    def retrieve_from_url(self, string):
        return self.__id_with_type(string.lower(), GraphEntity.url)

    def retrieve_from_issn(self, string):
        return self.__id_with_type(string, GraphEntity.issn)

    def retrieve_from_isbn(self, string):
        return self.__id_with_type(string, GraphEntity.isbn)

    def retrieve_issue_from_journal(self, id_dict, issue_id, volume_id):
        if volume_id is None:
            return self.__retrieve_from_journal(id_dict, GraphEntity.journal_issue, issue_id)
        else:
            retrieved_volume = self.retrieve_volume_from_journal(id_dict, volume_id)
            if retrieved_volume is not None:
                query = """
                    SELECT DISTINCT ?br WHERE {
                        ?br a <%s> ;
                            <%s> <%s> ;
                            <%s> "%s"
                    } LIMIT 1
                """ % (GraphEntity.journal_issue,
                       GraphEntity.part_of, str(retrieved_volume),
                       GraphEntity.has_sequence_identifier, issue_id)
                return self.__query(query)

    def retrieve_volume_from_journal(self, id_dict, volume_id):
        return self.__retrieve_from_journal(id_dict, GraphEntity.journal_volume, volume_id)

    def retrieve_br_url(self, res, string):
        return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.url)

    def retrieve_br_doi(self, res, string):
        return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.doi)

    def retrieve_br_pmid(self, res, string):
        return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmid)

    def retrieve_br_pmcid(self, res, string):
        return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmcid)

    def retrieve_last_snapshot(self, prov_subj):
        query = """
            SELECT DISTINCT ?se WHERE {
                ?se <%s> <%s> .
                FILTER NOT EXISTS {?se <%s> ?ca }
            } LIMIT 1
        """ % (ProvEntity.specialization_of, str(prov_subj),
               ProvEntity.was_invalidated_by)
        return self.__query(query)

    def __retrieve_res_id_by_type(self, res, id_string, id_type):
        if id_string is not None:
            query = """
            SELECT DISTINCT ?id WHERE {
                <%s> <%s> ?id .
                ?id <%s> <%s> ;
                    <%s> "%s"
            }""" % (
                res, GraphEntity.has_identifier,
                GraphEntity.uses_identifier_scheme, id_type,
                GraphEntity.has_literal_value, id_string)
            return self.__query(query)

    def __retrieve_from_journal(self, id_dict, part_type, part_seq_id):
        for id_type in id_dict:
            for id_string in id_dict[id_type]:
                query = """
                SELECT DISTINCT ?res WHERE {
                    ?j <%s> ?id .
                    ?id
                        <%s> <%s> ;
                        <%s> "%s" .
                    ?res a <%s> ;
                        <%s>+ ?j ;
                        <%s> "%s"
                }""" % (
                    GraphEntity.has_identifier,
                    GraphEntity.uses_identifier_scheme, id_type,
                    GraphEntity.has_literal_value, id_string,
                    part_type,
                    GraphEntity.part_of,
                    GraphEntity.has_sequence_identifier, part_seq_id
                )

                return self.__query(query)

    def __id_with_type(self, id_string, id_type, extras=""):
        query = """
        SELECT DISTINCT ?res WHERE {
            ?res <%s> ?id .
            ?id
                <%s> <%s> ;
                <%s> "%s" .
                %s
        }""" % (
            GraphEntity.has_identifier,
            GraphEntity.uses_identifier_scheme, id_type,
            GraphEntity.has_literal_value, id_string, extras)

        return self.__query(query)

    def __query(self, query):
        if self.ts is not None:
            result = self.ts.query(query)
            for res, in result:
                return res

        # If nothing has been returned, check if there is something
        # in the current graph set
        result = self.g.query(query)
        for res, in result:
            return res
Ejemplo n.º 38
0
    def __post_init__(self):
        self.storer = Storer(exit_dump=False)

        self.g = veusz.Embedded(name=self.title, hidden=self.hidden)
        self.g.EnableToolbar()
        self.init_pages()
Ejemplo n.º 39
0
from telegram import User
from telegram.ext import Updater, CommandHandler
import logging
import checker
import os
from time import time, ctime
from storer import Storer
from cardinfo import CardInfo, ThresholdExceedListener
from userinfo import UserInfo

STORED_FILE = os.getenv('STRELKA_STORED_FILE', 'strelka_bot_shelve.db')
TOKEN_FILENAME = os.getenv('STRELKA_TOKEN_FILE', 'token.lst')
BALANCE_CHECK_INTERVAL_SEC = 3600 # 1 hour

users = {}
storer = Storer(STORED_FILE)
job_queue = None

# Enable Logging
logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)

logger = logging.getLogger(__name__)

def get_description():
    return """/help - Show help
/getcardbalance - Returns balance for specified card
/addcard - Add a card to the list of registered cards
/removecard - Remove a card to the list of registered cards
/getcards - Returns balance for all registered cards
Ejemplo n.º 40
0
 def test_get_item_bpz2(self):
     """
     Test get item procedure
     """
     s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True)
     s.put(1, name="one")
     s.put(2, name="two")
     three = s.get("three")
     assert three == False #  "Should be False!"
     s.put(3, name="three")
     s.dump()
     # here is new data in storer
     three = s.get("three")
     assert three == 3, f"got something different: [{three}]"  # "Should be 3!"
     s._cleanup()