def main(): try: links_afisha = WebSource(config.URL_AFISHA_VLG).get_links_afisha() afisha_movies = [ WebSource(link).get_info_about_movie() for link in links_afisha ] Storer('database.txt').save('afisha', afisha_movies) links_temp = WebSource(url_premieres()).get_links_premieres() links_premieres = [ link for link in links_temp if link not in links_afisha ] premieres_movies = [ WebSource(link).get_info_about_movie() for link in links_premieres ] Storer('database.txt').save('premieres', premieres_movies) except Exception as err: logging.error(err) time.sleep(5) print("Error!")
def __init__(self, tp_url_real, context_path, context_file_path, base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None): self.tp_url = triplestore_url self.base_iri = base_iri self.base_dir = base_dir self.info_dir = info_dir self.context_path = context_path self.dataset_home = URIRef(dataset_home) self.tmp_dir = tmp_dir self.tp_res = URIRef(tp_url_real) self.repok = Reporter(prefix="[DatasetHandler: INFO] ") self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ") self.st = Storer(context_map={context_path: context_file_path}, repok=self.repok, reperr=self.reperr) self.st.set_preface_query( u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" % (str(DatasetHandler.modified), str( DatasetHandler.dataset), str(DatasetHandler.modified)))
class Crawler(object): def __init__(self): self.storer = Storer(db="Quora", collection="user") self.parser = Parser("http://www.quora.com/profile/") def crawl(self, user_id, depth): for item in self.parser.parse_user(user_id, depth): self.storer.save(item)
def setUp(self): self.file_name = "testeo" self.extension_txt = "txt" self.extension_csv = "csv" self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\ ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\ ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7'] self.store01 = Storer(self.matrix, self.file_name, self.extension_txt) self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)
def setUp(self): self.file_name = "testeo" self.extension_txt = "txt" self.extension_csv = "csv" self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\ ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\ ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7'] self.store01 = Storer(self.matrix, self.file_name, self.extension_txt) self.store02 = Storer(self.matrix, self.file_name, self.extension_csv)
def __init__(self, input_dir, output_dir=None, tmp_dir=None): self.input_dir = input_dir self.output_dir = output_dir self.tmp_dir = tmp_dir self.storer = Storer() self.name = self.__class__.__name__ self.repok = Reporter(prefix="[%s - INFO] " % self.name) self.repok.new_article() self.reper = Reporter(prefix="[%s - ERROR] " % self.name) self.reper.new_article()
def handle_start(message): if not Storer('userdata.txt').check_user(str(message.chat.id)): msg = bot.send_message( message.chat.id, 'Выбирайте', reply_markup=markups.choice_buttons1 ) bot.register_next_step_handler(msg, ask_run_or_template) else: Storer('userdata.txt').save_user(str(message.chat.id)) msg = bot.send_message(message.chat.id, 'Введите название шаблона') bot.register_next_step_handler(msg, ask_template)
def test_initialization_pkl(self): """ Test initialization procedure Expected one backup file after dump procedure """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True) s.put(1, name="one") s.dump() s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True) one = len(s2.backup_list) assert one == 1, f"Got something different: {one}" s._cleanup()
class Checker(object): def __init__(self, input_dir, output_dir=None, tmp_dir=None): self.input_dir = input_dir self.output_dir = output_dir self.tmp_dir = tmp_dir self.storer = Storer() self.name = self.__class__.__name__ self.repok = Reporter(prefix="[%s - INFO] " % self.name) self.repok.new_article() self.reper = Reporter(prefix="[%s - ERROR] " % self.name) self.reper.new_article() def process(self): for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir): for cur_file in cur_files: self.repok.new_article() self.reper.new_article() cur_rdf_path = cur_dir + os.sep + cur_file try: self.repok.add_sentence("Processing '%s'" % cur_rdf_path) g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir) if self.output_dir is None: self.repok.add_sentence("The RDF graph has been converted in TRIG as follows:\n%s" % g.serialize(format="trig")) else: if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl" self.repok.add_sentence("The RDF graph has been stored in %s" % (output_file, g.serialize(output_file, format="trig"))) except Exception: self.reper.add_sentence("The file '%s' doesn't contain RDF statements", False)
def test_creating_pbz2(self): """ Test paths of the created instance (PBZ2) """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True) s.put(1, name="one") s.dump() assert os.path.exists(os.path.expanduser(PATH_DUMPS) + DUMP_NAME + PBZ2) s._cleanup()
def test_backup_dumb_pbz2(self): """ Test backup creating """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True) s.put(1, name="one") s.backup() assert os.path.exists(os.path.expanduser(PATH_DUMPS_BACKUP) + DUMP_NAME + PBZ2) s._cleanup()
def add_genres(message): genre = message.text Storer('userdata.txt').add_genre(str(message.chat.id), template, genre) msg = bot.send_message( message.chat.id, 'Выбирайте', reply_markup=markups.choice_buttons2 ) bot.register_next_step_handler(msg, add_genre_or_finish)
def ask_template(message): global template template = message.text Storer('userdata.txt').add_template(str(message.chat.id), template) msg = bot.send_message( message.chat.id, 'Добавьте жанры в шаблон', reply_markup=markups.choice_genres ) bot.register_next_step_handler(msg, add_genres)
def __init__(self, input_dir, output_dir=None, tmp_dir=None): self.input_dir = input_dir self.output_dir = output_dir self.tmp_dir = tmp_dir self.storer = Storer() self.name = self.__class__.__name__ self.repok = Reporter(prefix="[%s - INFO] " % self.name) self.repok.new_article() self.reper = Reporter(prefix="[%s - ERROR] " % self.name) self.reper.new_article()
def findMovies(userid): usersets = Storer('userdata.txt').get_usersets(str(userid)) afisha = Storer('database.txt').get_data('afisha') afisha_movies = get_info(usersets, afisha) if afisha_movies: for movie in afisha_movies: bot.send_message(userid, movie) else: bot.send_message(userid, "Из текущего репертуара подходящих фильмов нет") bot.send_message(userid, "Обзор среди премьер текущего месяца: ") time.sleep(1) premieres = Storer('database.txt').get_data('premieres') premieres_movies = get_info(usersets, premieres) if premieres: for movie in premieres_movies: bot.send_message(userid, movie) else: bot.send_message(userid, "Увы, и среди премьер подходящих фильмов нет") bot.send_message(userid, 'Готово', reply_markup=markups.start_markup)
def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None, tmp_dir=None, context_map={}): self.g = Graph() self.base_dir = base_dir self.base_iri = base_iri self.storer = Storer(context_map=context_map) self.tmp_dir = tmp_dir self.name = "SPACIN " + self.__class__.__name__ self.loaded = set() if g_set is not None: self.update_graph_set(g_set) if ts_url is None: self.ts = None else: self.ts = ConjunctiveGraph('SPARQLUpdateStore') self.ts.open((ts_url, ts_url))
def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None, tmp_dir=None, context_map={}): self.g = Graph() self.base_dir = base_dir self.base_iri = base_iri self.storer = Storer(context_map=context_map) self.tmp_dir = tmp_dir self.name = "SPACIN " + self.__class__.__name__ self.loaded = set() if g_set is not None: self.update_graph_set(g_set) if ts_url is None: self.ts = None else: self.ts = ConjunctiveGraph('SPARQLUpdateStore') self.ts.open((ts_url, ts_url))
def __init__(self, tp_url_real, context_path, context_file_path, base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None): self.tp_url = triplestore_url self.base_iri = base_iri self.base_dir = base_dir self.info_dir = info_dir self.context_path = context_path self.dataset_home = URIRef(dataset_home) self.tmp_dir = tmp_dir self.tp_res = URIRef(tp_url_real) self.repok = Reporter(prefix="[DatasetHandler: INFO] ") self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ") self.st = Storer(context_map={context_path: context_file_path}, repok=self.repok, reperr=self.reperr) self.st.set_preface_query( u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" % (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified)))
class TestStorerClassAndMethods(unittest.TestCase): def setUp(self): self.file_name = "testeo" self.extension_txt = "txt" self.extension_csv = "csv" self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\ ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\ ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7'] self.store01 = Storer(self.matrix, self.file_name, self.extension_txt) self.store02 = Storer(self.matrix, self.file_name, self.extension_csv) def test_storer_receive_righ_number_of_elements_from_1_to_9_be_stored( self): num_elements = self.store01.verify_right_matrix() self.assertEqual(81, num_elements) def test_a_sudoku_solution_is_storer_in_txt_format(self): self.store01.save_matrix_to_file() self.assertTrue( self.file_exist(self.store01.sudoku_file, self.store01.sudoku_extension)) def test_a_sudoku_solution_is_storer_in_csv_format(self): self.store02.save_matrix_to_file() self.assertTrue( self.file_exist(self.store02.sudoku_file, self.store02.sudoku_extension)) def test_if_the_file_already_exist_an_alert_is_displayed(self): error = self.store02.save_matrix_to_file() self.assertEqual("File already exist", error) def file_exist(self, f_name, extension_file): """ This def is auxiliar to verify that the new file was created properly. """ try: f = open(f_name + "." + extension_file, 'r') f.read() f.close() return True except IOError: return IOError
class Checker(object): def __init__(self, input_dir, output_dir=None, tmp_dir=None): self.input_dir = input_dir self.output_dir = output_dir self.tmp_dir = tmp_dir self.storer = Storer() self.name = self.__class__.__name__ self.repok = Reporter(prefix="[%s - INFO] " % self.name) self.repok.new_article() self.reper = Reporter(prefix="[%s - ERROR] " % self.name) self.reper.new_article() def process(self): for cur_dir, cur_subdir, cur_files in os.walk(self.input_dir): for cur_file in cur_files: self.repok.new_article() self.reper.new_article() cur_rdf_path = cur_dir + os.sep + cur_file try: self.repok.add_sentence("Processing '%s'" % cur_rdf_path) g = self.storer.load(cur_rdf_path, tmp_dir=self.tmp_dir) if self.output_dir is None: self.repok.add_sentence( "The RDF graph has been converted in TRIG as follows:\n%s" % g.serialize(format="trig")) else: if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) output_file = self.output_dir + os.sep + "converted_" + cur_file + ".ttl" self.repok.add_sentence( "The RDF graph has been stored in %s" % (output_file, g.serialize(output_file, format="trig"))) except Exception: self.reper.add_sentence( "The file '%s' doesn't contain RDF statements", False)
class TestStorerClassAndMethods(unittest.TestCase): def setUp(self): self.file_name = "testeo" self.extension_txt = "txt" self.extension_csv = "csv" self.matrix = ['9','3','8','4','7','5','1','6','2'],['1','4','6','9','3','2','7','5','8'],['7','5','2','8','6','1','9','4','3'],\ ['8','9','1','6','4','3','2','7','5'],['3','6','5','2','9','7','8','1','4'],['4','2','7','1','5','8','6','3','9'],\ ['5','8','3','7','2','6','4','9','1'],['2','7','9','3','1','4','5','8','6'],['6','1','4','5','8','9','3','2','7'] self.store01 = Storer(self.matrix, self.file_name, self.extension_txt) self.store02 = Storer(self.matrix, self.file_name, self.extension_csv) def test_storer_receive_righ_number_of_elements_from_1_to_9_be_stored(self): num_elements = self.store01.verify_right_matrix() self.assertEqual (81, num_elements) def test_a_sudoku_solution_is_storer_in_txt_format(self): self.store01.save_matrix_to_file() self.assertTrue(self.file_exist(self.store01.sudoku_file,self.store01.sudoku_extension)) def test_a_sudoku_solution_is_storer_in_csv_format(self): self.store02.save_matrix_to_file() self.assertTrue(self.file_exist(self.store02.sudoku_file,self.store02.sudoku_extension)) def test_if_the_file_already_exist_an_alert_is_displayed(self): error = self.store02.save_matrix_to_file() self.assertEqual("File already exist", error) def file_exist(self, f_name, extension_file): """ This def is auxiliar to verify that the new file was created properly. """ try: f=open(f_name+"."+extension_file,'r') f.read() f.close() return True except IOError: return IOError
arg_parser = argparse.ArgumentParser( "find_by_identifier.py", description="This script allows one to identify the files in a given directory " "containing RDF documents that seem to be identical according to the " "identifier of the entity they describe.") arg_parser.add_argument("-i", "--input_dir", dest="i_dir", required=True, help="The directory where to look for duplicates.") arg_parser.add_argument("-o", "--output_file", dest="o_file", help="The file where to write the results.") arg_parser.add_argument("--recursive", dest="rec", default=False, action="store_true", help="The process will consider also the subdir recursively.") args = arg_parser.parse_args() id_doc = {} s = Storer(context_map={context_path: context_file_path}) all_files = [] if args.rec: for cur_dir, cur_subdir, cur_files in os.walk(args.i_dir): for cur_file in cur_files: if cur_file.endswith(".json"): all_files += [cur_dir + os.sep + cur_file] else: for cur_file in os.listdir(args.i_dir): if cur_file.endswith(".json"): all_files += [args.i_dir + os.sep + cur_file] for rdf_path in all_files: cur_g = s.load(rdf_path, tmp_dir=temp_dir_for_rdf_loading) try:
#!/usr/bin/env python3 from storer import Storer import sys s = Storer() if s.get_value() != 0: print('Initial value incorrect.') sys.exit(1) s.set_value(42) if s.get_value() != 42: print('Setting value failed.') sys.exit(1) try: s.set_value('not a number') print('Using wrong argument type did not fail.') sys.exit(1) except TypeError: pass
class DatasetHandler(object): DCTERMS = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") VOID = Namespace("http://rdfs.org/ns/void#") MTT = Namespace("https://w3id.org/spar/mediatype/text/") DBR = Namespace("http://dbpedia.org/resource/") dataset = DCAT.Dataset datafile = DCAT.Distribution title = DCTERMS.title description = DCTERMS.description issued = DCTERMS.issued modified = DCTERMS.modified keyword = DCAT.keyword subject = DCAT.theme landing_page = DCAT.landingPage subset = VOID.subset sparql_endpoint = VOID.sparqlEndpoint distribution = DCAT.distribution license = DCTERMS.license download_url = DCAT.downloadURL media_type = DCAT.mediaType byte_size = DCAT.byte_size label = RDFS.label a = RDF.type turtle = MTT.turtle bibliographic_database = DBR.Bibliographic_database open_access = DBR.Open_access scholary_communication = DBR.Scholarly_communication citations = DBR.Citation def __init__(self, tp_url_real, context_path, context_file_path, base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None): self.tp_url = triplestore_url self.base_iri = base_iri self.base_dir = base_dir self.info_dir = info_dir self.context_path = context_path self.dataset_home = URIRef(dataset_home) self.tmp_dir = tmp_dir self.tp_res = URIRef(tp_url_real) self.repok = Reporter(prefix="[DatasetHandler: INFO] ") self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ") self.st = Storer(context_map={context_path: context_file_path}, repok=self.repok, reperr=self.reperr) self.st.set_preface_query( u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" % (str(DatasetHandler.modified), str(DatasetHandler.dataset), str(DatasetHandler.modified))) # /START Create Literal def create_label(self, g, res, string): return create_literal(g, res, RDFS.label, string) def create_publication_date(self, g, res, string): return create_literal(g, res, self.issued, string, XSD.dateTime) def update_modification_date(self, g, res, string): g.remove((res, self.modified, None)) return create_literal(g, res, self.modified, string, XSD.dateTime) def create_title(self, g, res, string): return create_literal(g, res, self.title, string) def create_description(self, g, res, string): return create_literal(g, res, self.description, string) def create_keyword(self, g, res, string): return create_literal(g, res, self.keyword, string) def create_byte_size(self, g, res, string): return create_literal(g, res, self.byte_size, string, XSD.decimal) # /END Create Literal # /START Create Complex Attributes def has_subject(self, g, res, obj): g.add((res, self.subject, obj)) def has_landing_page(self, g, res, obj): g.add((res, self.landing_page, obj)) def has_subset(self, g, res, obj): g.add((res, self.subset, obj)) def has_sparql_endpoint(self, g, res, obj): g.add((res, self.sparql_endpoint, obj)) def has_distribution(self, g, res, obj): g.add((res, self.distribution, obj)) def has_license(self, g, res, obj): g.add((res, self.license, obj)) def has_download_url(self, g, res, obj): g.add((res, self.download_url, obj)) def has_media_type(self, g, res, obj): g.add((res, self.media_type, obj)) # /END Create Complex Attributes # /START Types def dataset_type(self, g, res): create_type(g, res, self.dataset) def distribution_type(self, g, res): create_type(g, res, self.datafile) # /END Types def update_dataset_info(self, graph_set): cur_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') subgraphs_to_update = set() all_graphs = [] for g in graph_set.graphs(): cur_id = g.identifier if cur_id not in subgraphs_to_update: subgraphs_to_update.add(cur_id) cur_dataset_res = URIRef(cur_id) cur_dataset = self.get_dataset_graph(cur_dataset_res, cur_time) self.update_modification_date(cur_dataset, cur_dataset_res, cur_time) all_graphs += [cur_dataset] if subgraphs_to_update: cur_occ_res = URIRef(self.base_iri) cur_occ = self.get_dataset_graph(cur_occ_res, cur_time) self.update_modification_date(cur_occ, cur_occ_res, cur_time) for subgraph_id in subgraphs_to_update: self.has_subset(cur_occ, cur_occ_res, URIRef(subgraph_id)) all_graphs += [cur_occ] if all_graphs: # Store everything and upload to triplestore if self.tp_url is None: self.st.store_all( self.base_dir, self.base_iri, self.context_path, self.tmp_dir, all_graphs, True) else: self.st.upload_and_store( self.base_dir, self.tp_url, self.base_iri, self.context_path, self.tmp_dir, all_graphs, True) def get_dataset_graph(self, res, cur_time): dataset_path = self.get_metadata_path_from_resource(res) if os.path.exists(dataset_path): return list(self.st.load(dataset_path, tmp_dir=self.tmp_dir).contexts())[0] else: dataset_label = "OCC" dataset_title = "The OpenCitations Corpus" dataset_description = "The OpenCitations Corpus is an open repository of scholarly " \ "citation data made available under a Creative Commons public " \ "domain dedication, which provides in RDF accurate citation " \ "information (bibliographic references) harvested from the " \ "scholarly literature (described using the SPAR Ontologies) " \ "that others may freely build upon, enhance and reuse for any " \ "purpose, without restriction under copyright or database law." if re.search("/../$", str(res)) is not None: g = Graph(identifier=str(res)) dataset_short_name = str(res)[-3:-1] dataset_name = GraphSet.labels[dataset_short_name] dataset_title += ": %s dataset" % dataset_name.title() dataset_description += " This sub-dataset contains all the '%s' resources." % \ dataset_name dataset_label += " / %s" % dataset_short_name self.create_keyword(g, res, dataset_name) else: g = Graph() self.has_landing_page(g, res, self.dataset_home) self.has_sparql_endpoint(g, res, self.tp_res) self.dataset_type(g, res) self.create_label(g, res, dataset_label) self.create_title(g, res, dataset_title) self.create_description(g, res, dataset_description) self.create_publication_date(g, res, cur_time) self.create_keyword(g, res, "OCC") self.create_keyword(g, res, "OpenCitations") self.create_keyword(g, res, "OpenCitations Corpus") self.create_keyword(g, res, "SPAR Ontologies") self.create_keyword(g, res, "bibliographic references") self.create_keyword(g, res, "citations") self.has_subject(g, res, self.bibliographic_database) self.has_subject(g, res, self.scholary_communication) self.has_subject(g, res, self.open_access) self.has_subject(g, res, self.citations) return g def get_metadata_path_from_resource(self, dataset_res): return self.get_metadata_path_from_iri(str(dataset_res)) def get_metadata_path_from_iri(self, dataset_iri): return re.sub("^%s" % self.base_iri, self.base_dir, dataset_iri) + "index.json"
class VeuszEngine(BotEngine): internal_name: str = "[VeuszEngine]" # g: Embedded = None title: str = field(default="Notitle") pages_info: dict = field(default_factory=dict) _xy: Any = None # flag for animation # showkey: bool = True keyBorderHide: bool = True keyFontSize: int = 14 plot_line: bool = True # xname: str = "x" yname: str = "y" xlog: bool = False ylog: bool = False ymin: str = "Auto" ymax: str = "Auto" xmin: str = "Auto" xmax: str = "Auto" # transparency: int = 50 def __post_init__(self): self.storer = Storer(exit_dump=False) self.g = veusz.Embedded(name=self.title, hidden=self.hidden) self.g.EnableToolbar() self.init_pages() def _init(self, page_name=""): # creating initial values for plotting per page. self.storer.put(what="xname", name=page_name + "/xname") self.storer.put(what="yname", name=page_name + "/yname") self.storer.put(what=False, name=page_name + "/xlog") self.storer.put(what=False, name=page_name + "/ylog") self.storer.put(what="Auto", name=page_name + "/xmin") self.storer.put(what="Auto", name=page_name + "/xmax") self.storer.put(what="Auto", name=page_name + "/ymin") self.storer.put(what="Auto", name=page_name + "/ymax") def init_pages(self): if self.pages_info: for page in self.pages_info: self._init(page_name=page) for prop in self.pages_info[page]: self.storer.put(what=self.pages_info[page][prop], name=page + "/" + prop) else: self._init(page_name="page1") self.storer.put(what=self.xname, name="page1/xname") self.storer.put(what=self.yname, name="page1/yname") self.storer.put(what=self.xlog, name="page1/xlog") self.storer.put(what=self.ylog, name="page1/ylog") self.storer.put(what=self.xmin, name="page1/xmin") self.storer.put(what=self.xmax, name="page1/xmax") self.storer.put(what=self.ymax, name="page1/ymax") self.storer.put(what=self.ymin, name="page1/ymin") def get_page(self, name="page1"): try: self.page = self.g.Root[name] _num_lines = self.storer.get(name=name + "/_num_lines") __num_lines = self.storer.get( name=name + "/__num_lines") # if save_previous_state is applied except KeyError: self.page = self.g.Root.Add("page") self.page.Rename(name) __num_lines = 1 _num_lines = 1 self.storer.put(what=_num_lines, name=name + "/_num_lines") self.storer.put(what=__num_lines, name=name + "/__num_lines") self.page.width.val = '15cm' self.page.height.val = '10cm' try: self.graph = self.g.Root[name + '/graph1'] except: self.graph = self.page.Add('graph') try: # key exist self.key = self.g.Root[name + "/graph1/key1"] except: if self.showkey: self.graph.Add('key') self.graph.key1.Border.hide.val = self.keyBorderHide self.graph.key1.Text.size.val = f"{str(self.keyFontSize)}pt" return _num_lines, __num_lines def plot( self, x: List, y: List, key_name_f: str = "", key_name: str = "", marker_size: str = "2.5pt", plot_line: bool = True, color_num: Union[str, int] = "auto", marker_type: Union[str, int] = "auto", line_type: Union[str, int] = "auto", save_previous_state: bool = False, animation: bool = False, error_style: str = None, internal_text: str = "", fill_marker: bool = False, page: str = "page1", ): _num_lines, __num_lines = self.get_page(name=page) if animation: color_num = _num_lines line_type = _num_lines save_previous_state = True xy = self._xy if save_previous_state: _num_lines -= 1 if color_num == "auto": color_num = _num_lines if line_type == "auto": line_type = _num_lines if not animation: x_dataname = self.xname + str(_num_lines) + str( save_previous_state) + str(__num_lines) + str(page) y_dataname = self.yname + str(_num_lines) + str( save_previous_state) + str(__num_lines) + str(page) else: x_dataname = self.xname + str(_num_lines) + str( save_previous_state) + str(page) y_dataname = self.yname + str(_num_lines) + str( save_previous_state) + str(page) x_dataname += internal_text y_dataname += internal_text if len(np.shape(x)) == 2: x_arr = np.array(x) x_data, x_data_err = x_arr[:, 0], x_arr[:, 1] self.g.SetData(x_dataname, x_data, symerr=x_data_err) else: x_arr = np.array(x) x_data = x_arr self.g.SetData(x_dataname, x_data) if len(np.shape(y)) == 2: y_arr = np.array(y) y_data, y_data_err = y_arr[:, 0], y_arr[:, 1] self.g.SetData(y_dataname, y_data, symerr=y_data_err) else: y_arr = np.array(y) y_data = y_arr self.g.SetData(y_dataname, y_data) # self.graph = self.g.Root[name + '/graph1'] if animation: if not self._xy: self._xy = xy = self.g.Root[page + '/graph1'].Add('xy') else: xy = self.g.Root[page + '/graph1'].Add('xy') # nn.plotter_progress.g.Root.xyz_file.graph1.xy1.Clone(nn.plotter_progress.g.Root.xyz_file.graph1, 'xy7') xy.xData.val = x_dataname xy.yData.val = y_dataname if marker_type != "auto": xy.marker.val = get_marker_type(marker_type) else: xy.marker.val = get_marker_type(line_type) if color_num % 2: xy.MarkerFill.color.val = get_line_color(color_num) else: xy.MarkerFill.color.val = 'white' if fill_marker: xy.MarkerFill.color.val = get_line_color(color_num) xy.MarkerLine.color.val = get_line_color(color_num) xy.markerSize.val = marker_size xy.PlotLine.width.val = '1pt' xy.PlotLine.style.val = get_line_type(line_type) xy.PlotLine.color.val = get_line_color(color_num) xy.PlotLine.hide.val = not plot_line if error_style: xy.errorStyle.val = error_style xy.FillBelow.color.val = get_line_color(color_num) xy.FillBelow.transparency.val = int(self.transparency) xy.FillAbove.color.val = get_line_color(color_num) xy.FillAbove.transparency.val = int(self.transparency) #ErrorBarLine/style xy.ErrorBarLine.color.val = get_line_type(line_type) xy.ErrorBarLine.style.val = get_line_type(line_type) else: xy.errorStyle.val = 'none' xy.ErrorBarLine.width.val = '1pt' xy.ErrorBarLine.color.val = get_line_color(color_num) if self.showkey and key_name_f: xy.key.val = self.name_converter(key_name_f) if self.showkey and key_name: xy.key.val = key_name x_axis = self.graph.x y_axis = self.graph.y x_axis.label.val = self.storer.get(page + "/xname") # self.xname y_axis.label.val = self.storer.get(page + "/yname") # self.yname x_axis.log.val = self.storer.get(page + "/xlog") # self.xlog y_axis.log.val = self.storer.get(page + "/ylog") # self.ylog x_axis.min.val = self.storer.get(page + "/xmin") # self.xmin x_axis.max.val = self.storer.get(page + "/xmax") # self.xmax y_axis.min.val = self.storer.get(page + "/ymin") # self.ymin y_axis.max.val = self.storer.get(page + "/ymax") # self.ymax _num_lines += 1 __num_lines += 1 self.storer.put(_num_lines, name=page + "/_num_lines") self.storer.put(__num_lines, name=page + "/__num_lines") def export(self, filename: str = "output.pdf", extension: str = "pdf", color: bool = True, page: int = 0, dpi: int = 100, antialias: bool = True, quality: int = 85, backcolor: str = '#ffffff00', pdfdpi: int = 150, svgtextastext: bool = False): if not filename or not extension: print( f"{self.internal_name} You have to specify filename and extension!" ) print( f"{self.internal_name} For example: filename='my_amazing_figure', extension='pdf'" ) print( f"{self.internal_name} color=True, extension='pdf', quality='85', pdfdpi='150'" ) print( f"{self.internal_name} Available extensions: [pdf]/[eps]/[ps]/[svg]/[jpg]/[jpeg]/[bmp]/[png]" ) else: self.g.Export(filename, color=color, page=page, dpi=dpi, antialias=antialias, quality=quality, backcolor=backcolor, pdfdpi=pdfdpi, svgtextastext=svgtextastext) def save(self, filename=None): if not filename: print( f"{self.internal_name} You have to specify filename! [Labels from Y and X will be added automatically]" ) else: if filename.find(".") != -1 or filename.find(":") or filename.find( "\\") or filename.find("*") or filename.find( "/") or filename.find("\\\\"): print( f"{self.internal_name} I found forbidden symbols [.]/[:]..." ) filename.replace(".", "").replace(":", "_").replace( "\\\\", "").replace("*", "").replace("/", "_").replace("\\", "") # latex reduction xname = self.xname.replace("\\italic", "").replace( "{", "").replace("}", "").replace("_", "").replace("^", "").replace( "\\\\", "").replace("\\", "").replace("/", "_").replace("*", "") yname = self.yname.replace("\\italic", "").replace( "{", "").replace("}", "").replace("_", "").replace("^", "").replace( "\\\\", "").replace("\\", "").replace("/", "_").replace("*", "") # space reduction xname = xname.replace(" ", "") yname = yname.replace(" ", "") name4saving = filename + "_" + yname + "_" + xname if not os.path.exists(name4saving + ".vsz"): self.g.Save(name4saving + ".vsz") else: print(f"{self.internal_name} The file exists!") i = 0 while os.path.exists(name4saving + str(i) + ".vsz"): i += 1 name4saving += str(i) + ".vsz" self.g.Save(name4saving) print(f"{self.internal_name} Saved! filename: {name4saving}")
class DatasetHandler(object): DCTERMS = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") VOID = Namespace("http://rdfs.org/ns/void#") MTT = Namespace("https://w3id.org/spar/mediatype/text/") DBR = Namespace("http://dbpedia.org/resource/") dataset = DCAT.Dataset datafile = DCAT.Distribution title = DCTERMS.title description = DCTERMS.description issued = DCTERMS.issued modified = DCTERMS.modified keyword = DCAT.keyword subject = DCAT.theme landing_page = DCAT.landingPage subset = VOID.subset sparql_endpoint = VOID.sparqlEndpoint distribution = DCAT.distribution license = DCTERMS.license download_url = DCAT.downloadURL media_type = DCAT.mediaType byte_size = DCAT.byte_size label = RDFS.label a = RDF.type turtle = MTT.turtle bibliographic_database = DBR.Bibliographic_database open_access = DBR.Open_access scholary_communication = DBR.Scholarly_communication citations = DBR.Citation def __init__(self, tp_url_real, context_path, context_file_path, base_iri, base_dir, info_dir, dataset_home, tmp_dir, triplestore_url=None): self.tp_url = triplestore_url self.base_iri = base_iri self.base_dir = base_dir self.info_dir = info_dir self.context_path = context_path self.dataset_home = URIRef(dataset_home) self.tmp_dir = tmp_dir self.tp_res = URIRef(tp_url_real) self.repok = Reporter(prefix="[DatasetHandler: INFO] ") self.reperr = Reporter(prefix="[DatasetHandler: ERROR] ") self.st = Storer(context_map={context_path: context_file_path}, repok=self.repok, reperr=self.reperr) self.st.set_preface_query( u"DELETE { ?res <%s> ?date } WHERE { ?res a <%s> ; <%s> ?date }" % (str(DatasetHandler.modified), str( DatasetHandler.dataset), str(DatasetHandler.modified))) # /START Create Literal def create_label(self, g, res, string): return create_literal(g, res, RDFS.label, string) def create_publication_date(self, g, res, string): return create_literal(g, res, self.issued, string, XSD.dateTime) def update_modification_date(self, g, res, string): g.remove((res, self.modified, None)) return create_literal(g, res, self.modified, string, XSD.dateTime) def create_title(self, g, res, string): return create_literal(g, res, self.title, string) def create_description(self, g, res, string): return create_literal(g, res, self.description, string) def create_keyword(self, g, res, string): return create_literal(g, res, self.keyword, string) def create_byte_size(self, g, res, string): return create_literal(g, res, self.byte_size, string, XSD.decimal) # /END Create Literal # /START Create Complex Attributes def has_subject(self, g, res, obj): g.add((res, self.subject, obj)) def has_landing_page(self, g, res, obj): g.add((res, self.landing_page, obj)) def has_subset(self, g, res, obj): g.add((res, self.subset, obj)) def has_sparql_endpoint(self, g, res, obj): g.add((res, self.sparql_endpoint, obj)) def has_distribution(self, g, res, obj): g.add((res, self.distribution, obj)) def has_license(self, g, res, obj): g.add((res, self.license, obj)) def has_download_url(self, g, res, obj): g.add((res, self.download_url, obj)) def has_media_type(self, g, res, obj): g.add((res, self.media_type, obj)) # /END Create Complex Attributes # /START Types def dataset_type(self, g, res): create_type(g, res, self.dataset) def distribution_type(self, g, res): create_type(g, res, self.datafile) # /END Types def update_dataset_info(self, graph_set): cur_time = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') subgraphs_to_update = set() all_graphs = [] for g in graph_set.graphs(): cur_id = g.identifier if cur_id not in subgraphs_to_update: subgraphs_to_update.add(cur_id) cur_dataset_res = URIRef(cur_id) cur_dataset = self.get_dataset_graph(cur_dataset_res, cur_time) self.update_modification_date(cur_dataset, cur_dataset_res, cur_time) all_graphs += [cur_dataset] if subgraphs_to_update: cur_occ_res = URIRef(self.base_iri) cur_occ = self.get_dataset_graph(cur_occ_res, cur_time) self.update_modification_date(cur_occ, cur_occ_res, cur_time) for subgraph_id in subgraphs_to_update: self.has_subset(cur_occ, cur_occ_res, URIRef(subgraph_id)) all_graphs += [cur_occ] if all_graphs: # Store everything and upload to triplestore if self.tp_url is None: self.st.store_all(self.base_dir, self.base_iri, self.context_path, self.tmp_dir, all_graphs, True) else: self.st.upload_and_store(self.base_dir, self.tp_url, self.base_iri, self.context_path, self.tmp_dir, all_graphs, True) def get_dataset_graph(self, res, cur_time): dataset_path = self.get_metadata_path_from_resource(res) if os.path.exists(dataset_path): return list( self.st.load(dataset_path, tmp_dir=self.tmp_dir).contexts())[0] else: dataset_label = "OCC" dataset_title = "The OpenCitations Corpus" dataset_description = "The OpenCitations Corpus is an open repository of scholarly " \ "citation data made available under a Creative Commons public " \ "domain dedication, which provides in RDF accurate citation " \ "information (bibliographic references) harvested from the " \ "scholarly literature (described using the SPAR Ontologies) " \ "that others may freely build upon, enhance and reuse for any " \ "purpose, without restriction under copyright or database law." if re.search("/../$", str(res)) is not None: g = Graph(identifier=str(res)) dataset_short_name = str(res)[-3:-1] dataset_name = GraphSet.labels[dataset_short_name] dataset_title += ": %s dataset" % dataset_name.title() dataset_description += " This sub-dataset contains all the '%s' resources." % \ dataset_name dataset_label += " / %s" % dataset_short_name self.create_keyword(g, res, dataset_name) else: g = Graph() self.has_landing_page(g, res, self.dataset_home) self.has_sparql_endpoint(g, res, self.tp_res) self.dataset_type(g, res) self.create_label(g, res, dataset_label) self.create_title(g, res, dataset_title) self.create_description(g, res, dataset_description) self.create_publication_date(g, res, cur_time) self.create_keyword(g, res, "OCC") self.create_keyword(g, res, "OpenCitations") self.create_keyword(g, res, "OpenCitations Corpus") self.create_keyword(g, res, "SPAR Ontologies") self.create_keyword(g, res, "bibliographic references") self.create_keyword(g, res, "citations") self.has_subject(g, res, self.bibliographic_database) self.has_subject(g, res, self.scholary_communication) self.has_subject(g, res, self.open_access) self.has_subject(g, res, self.citations) return g def get_metadata_path_from_resource(self, dataset_res): return self.get_metadata_path_from_iri(str(dataset_res)) def get_metadata_path_from_iri(self, dataset_iri): return re.sub("^%s" % self.base_iri, self.base_dir, dataset_iri) + "index.json"
help="The directory where to look for duplicates.") arg_parser.add_argument("-o", "--output_file", dest="o_file", help="The file where to write the results.") arg_parser.add_argument( "--recursive", dest="rec", default=False, action="store_true", help="The process will consider also the subdir recursively.") args = arg_parser.parse_args() id_doc = {} s = Storer(context_map={context_path: context_file_path}) all_files = [] if args.rec: for cur_dir, cur_subdir, cur_files in os.walk(args.i_dir): for cur_file in cur_files: if cur_file.endswith(".json"): all_files += [cur_dir + os.sep + cur_file] else: for cur_file in os.listdir(args.i_dir): if cur_file.endswith(".json"): all_files += [args.i_dir + os.sep + cur_file] for rdf_path in all_files: cur_g = s.load(rdf_path, tmp_dir=temp_dir_for_rdf_loading) try:
def test_separation(self): """ Test separation procedure """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=1, _test=True) s.put(1, name="one") # 1 s.put(2, name="two") # 2 s.put(3, name="three") # 3 s.put(4, name="four") # 4 s.dump() s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, _test=True) length = len(s2.backup_list) assert length == 4, f"Got something different: {length}" s._cleanup()
import init_bot import telegram import os import pytz from datetime import datetime, timedelta from storer import Storer from user_info import UserInfo from telegram.ext import Updater, CommandHandler, CallbackQueryHandler, Job STORED_FILE = os.getenv('UNI_STORED_FILE', 'unison_bot_shelve.db') MENU, AWAIT_INPUT_GAME, AWAIT_MEETING_ANSWER = range(3) state = dict() users = {} users_store = Storer(STORED_FILE) forum_subscribers = dict() meeting_subscribers = [] komsostav = [] posts_from_forum = [] last_check_new_posts = 0 UPDATE_FORUM_POSTS_TIMEOUT_SEC = 10. * 60 DEL_FORUM_POSTS_TIMEOUT_SEC = 24 * 60. * 60 logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG)
def test_loading_separation_pbz2(self): """ Test separation procedure """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=True, separations=1, _test=True) s.put(1, name="one") # _ s.put(2, name="two") # 0 s.put(3, name="three") # 1 s.put(4, name="four") # 2 s.dump() # 3 s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=True, _test=True) one = s2.get(name="one") assert 1 == one, f"Got something different: {one}" two = s2.get(name="two") assert 2 == two, f"Got something different: {two}" three = s2.get(name="three") assert 3 == three, f"Got something different: {three}" four = s2.get(name="four") assert 4 == four, f"Got something different: {four}" s._cleanup()
def test_long_loop_loading(self): """ Get method in loop: Checking for not overloading internal data dict """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=10000, _test= True) cycles = 1000 for idx in range(cycles): value = s.get(name=str(idx)) if not value: s.put(what=idx, name=str(idx)) s.dump() s2 = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, compressed=False, separations=10000, _test= True) for idx in range(cycles): value = s2.get(name=str(idx)) assert value == idx, f"Got something different: {value} != {idx}" s._cleanup()
class ResourceFinder(object): def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None, tmp_dir=None, context_map={}): self.g = Graph() self.base_dir = base_dir self.base_iri = base_iri self.storer = Storer(context_map=context_map) self.tmp_dir = tmp_dir self.name = "SPACIN " + self.__class__.__name__ self.loaded = set() if g_set is not None: self.update_graph_set(g_set) if ts_url is None: self.ts = None else: self.ts = ConjunctiveGraph('SPARQLUpdateStore') self.ts.open((ts_url, ts_url)) def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None): if self.base_dir is not None and self.base_iri is not None: cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri, dir_split_number, items_per_file)[1] if cur_file_path.endswith("index.json"): cur_path = cur_file_path.replace("index.json", "") + "prov" else: cur_path = cur_file_path[:-5] + os.sep + "prov" file_list = [] if os.path.isdir(cur_path): for cur_dir, cur_subdir, cur_files in os.walk(cur_path): for cur_file in cur_files: if cur_file.endswith(".json") and \ (prov_entity_type is None or cur_file.startswith(prov_entity_type)): file_list += [cur_dir + os.sep + cur_file] for file_path in file_list: if file_path not in self.loaded: self.loaded.add(file_path) cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir) self.add_triples_in_graph(cur_g) def add_triples_in_graph(self, g): if g is not None: for s, p, o in g.triples((None, None, None)): self.g.add((s, p, o)) def update_graph_set(self, g_set): for g in g_set.graphs(): self.add_triples_in_graph(g) def retrieve(self, id_dict): for id_type in id_dict: for id_string in id_dict[id_type]: res = self.__id_with_type(id_string, id_type) if res is not None: return res def retrieve_provenance_agent_from_name(self, string): query = """ SELECT DISTINCT ?pa WHERE { ?pa a <%s> ; <%s> "%s" } LIMIT 1 """ % (ProvEntity.prov_agent, GraphEntity.name, string) return self.__query(query) def retrieve_from_orcid(self, string): return self.__id_with_type(string, GraphEntity.orcid) def retrieve_citing_from_doi(self, string): return self.__id_with_type(string.lower(), GraphEntity.doi, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_pmid(self, string): return self.__id_with_type(string, GraphEntity.pmid, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_pmcid(self, string): return self.__id_with_type(string, GraphEntity.pmcid, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_url(self, string): return self.__id_with_type(string.lower(), GraphEntity.url, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_from_doi(self, string): return self.__id_with_type(string.lower(), GraphEntity.doi) def retrieve_from_pmid(self, string): return self.__id_with_type(string, GraphEntity.pmid) def retrieve_from_pmcid(self, string): return self.__id_with_type(string, GraphEntity.pmcid) def retrieve_from_url(self, string): return self.__id_with_type(string.lower(), GraphEntity.url) def retrieve_from_issn(self, string): return self.__id_with_type(string, GraphEntity.issn) def retrieve_from_isbn(self, string): return self.__id_with_type(string, GraphEntity.isbn) def retrieve_issue_from_journal(self, id_dict, issue_id, volume_id): if volume_id is None: return self.__retrieve_from_journal(id_dict, GraphEntity.journal_issue, issue_id) else: retrieved_volume = self.retrieve_volume_from_journal( id_dict, volume_id) if retrieved_volume is not None: query = """ SELECT DISTINCT ?br WHERE { ?br a <%s> ; <%s> <%s> ; <%s> "%s" } LIMIT 1 """ % (GraphEntity.journal_issue, GraphEntity.part_of, str(retrieved_volume), GraphEntity.has_sequence_identifier, issue_id) return self.__query(query) def retrieve_volume_from_journal(self, id_dict, volume_id): return self.__retrieve_from_journal(id_dict, GraphEntity.journal_volume, volume_id) def retrieve_br_url(self, res, string): return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.url) def retrieve_br_doi(self, res, string): return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.doi) def retrieve_br_pmid(self, res, string): return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmid) def retrieve_br_pmcid(self, res, string): return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmcid) def retrieve_last_snapshot(self, prov_subj): query = """ SELECT DISTINCT ?se WHERE { ?se <%s> <%s> . FILTER NOT EXISTS {?se <%s> ?ca } } LIMIT 1 """ % (ProvEntity.specialization_of, str(prov_subj), ProvEntity.was_invalidated_by) return self.__query(query) def __retrieve_res_id_by_type(self, res, id_string, id_type): if id_string is not None: query = """ SELECT DISTINCT ?id WHERE { <%s> <%s> ?id . ?id <%s> <%s> ; <%s> "%s" }""" % (res, GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string) return self.__query(query) def __retrieve_from_journal(self, id_dict, part_type, part_seq_id): for id_type in id_dict: for id_string in id_dict[id_type]: query = """ SELECT DISTINCT ?res WHERE { ?j <%s> ?id . ?id <%s> <%s> ; <%s> "%s" . ?res a <%s> ; <%s>+ ?j ; <%s> "%s" }""" % (GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string, part_type, GraphEntity.part_of, GraphEntity.has_sequence_identifier, part_seq_id) return self.__query(query) def __id_with_type(self, id_string, id_type, extras=""): query = """ SELECT DISTINCT ?res WHERE { ?res <%s> ?id . ?id <%s> <%s> ; <%s> "%s" . %s }""" % (GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string, extras) return self.__query(query) def __query(self, query): if self.ts is not None: result = self.ts.query(query) for res, in result: return res # If nothing has been returned, check if there is something # in the current graph set result = self.g.query(query) for res, in result: return res
print "\n\nProcess file '%s'\n" % cur_file_path json_object = json.load(fp) crp = CrossrefProcessor(base_iri, context_path, info_dir, json_object, ResourceFinder(ts_url=triplestore_url), ORCIDFinder(orcid_conf_path)) result = crp.process() if result is not None: prov = ProvSet(result, base_iri, context_path, info_dir, ResourceFinder(base_dir=base_dir, base_iri=base_iri, tmp_dir=temp_dir_for_rdf_loading, context_map= {context_path: context_file_path})) prov.generate_provenance() res_storer = Storer(result, context_map={context_path: context_file_path}, dir_split=dir_split_number, n_file_item=items_per_file) res_storer.upload_and_store( base_dir, triplestore_url, base_iri, context_path, temp_dir_for_rdf_loading) prov_storer = Storer(prov, context_map={context_path: context_file_path}, dir_split=dir_split_number, n_file_item=items_per_file) prov_storer.store_all( base_dir, base_iri, context_path, temp_dir_for_rdf_loading) dset_handler = DatasetHandler(triplestore_url_real, context_path,
if result is not None: prov = ProvSet( result, base_iri, context_path, info_dir, ResourceFinder( base_dir=base_dir, base_iri=base_iri, tmp_dir=temp_dir_for_rdf_loading, context_map={ context_path: context_file_path })) prov.generate_provenance() res_storer = Storer(result, context_map={ context_path: context_file_path }, dir_split=dir_split_number, n_file_item=items_per_file) res_storer.upload_and_store( base_dir, triplestore_url, base_iri, context_path, temp_dir_for_rdf_loading) prov_storer = Storer( prov, context_map={ context_path: context_file_path }, dir_split=dir_split_number, n_file_item=items_per_file) prov_storer.store_all(
def __init__(self): self.storer = Storer(db="Quora", collection="user") self.parser = Parser("http://www.quora.com/profile/")
from telegram.ext import CommandHandler, Filters, MessageHandler, Updater TOKEN = '' # TG bot token VK_TOKEN = '' # VK access token PUBLIC_ID = 0 # vk.com/wall<PUBLIC_ID>_... INTERVAL = 60 # Seconds IGNORE_TAGS = [] # ['#IGNORE', '#HASHTAGS'] # Enable logging logging.basicConfig(format='[%(asctime)s][%(levelname)s] - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) # Database storer = Storer('bot.db') def vk(method, params): params['access_token'] = VK_TOKEN params['v'] = 5.74 q = requests.post('https://api.vk.com/method/' + method, data=params) return json.loads(q.text) def start(bot, update): if storer.restore('cid') is None: update.message.reply_text( 'To get started, add me to the channel\'s administrators, ' 'then forward any message from the channel to this dialog')
class ResourceFinder(object): def __init__(self, g_set=None, ts_url=None, base_dir=None, base_iri=None, tmp_dir=None, context_map={}): self.g = Graph() self.base_dir = base_dir self.base_iri = base_iri self.storer = Storer(context_map=context_map) self.tmp_dir = tmp_dir self.name = "SPACIN " + self.__class__.__name__ self.loaded = set() if g_set is not None: self.update_graph_set(g_set) if ts_url is None: self.ts = None else: self.ts = ConjunctiveGraph('SPARQLUpdateStore') self.ts.open((ts_url, ts_url)) def add_prov_triples_in_filesystem(self, res_iri, prov_entity_type=None): if self.base_dir is not None and self.base_iri is not None: cur_file_path = find_paths(res_iri, self.base_dir, self.base_iri, dir_split_number, items_per_file)[1] if cur_file_path.endswith("index.json"): cur_path = cur_file_path.replace("index.json", "") + "prov" else: cur_path = cur_file_path[:-5] + os.sep + "prov" file_list = [] if os.path.isdir(cur_path): for cur_dir, cur_subdir, cur_files in os.walk(cur_path): for cur_file in cur_files: if cur_file.endswith(".json") and \ (prov_entity_type is None or cur_file.startswith(prov_entity_type)): file_list += [cur_dir + os.sep + cur_file] for file_path in file_list: if file_path not in self.loaded: self.loaded.add(file_path) cur_g = self.storer.load(file_path, tmp_dir=self.tmp_dir) self.add_triples_in_graph(cur_g) def add_triples_in_graph(self, g): if g is not None: for s, p, o in g.triples((None, None, None)): self.g.add((s, p, o)) def update_graph_set(self, g_set): for g in g_set.graphs(): self.add_triples_in_graph(g) def retrieve(self, id_dict): for id_type in id_dict: for id_string in id_dict[id_type]: res = self.__id_with_type(id_string, id_type) if res is not None: return res def retrieve_provenance_agent_from_name(self, string): query = """ SELECT DISTINCT ?pa WHERE { ?pa a <%s> ; <%s> "%s" } LIMIT 1 """ % (ProvEntity.prov_agent, GraphEntity.name, string) return self.__query(query) def retrieve_from_orcid(self, string): return self.__id_with_type(string, GraphEntity.orcid) def retrieve_citing_from_doi(self, string): return self.__id_with_type( string.lower(), GraphEntity.doi, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_pmid(self, string): return self.__id_with_type( string, GraphEntity.pmid, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_pmcid(self, string): return self.__id_with_type( string, GraphEntity.pmcid, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_citing_from_url(self, string): return self.__id_with_type( string.lower(), GraphEntity.url, "?res <%s> ?cited" % GraphEntity.cites) def retrieve_from_doi(self, string): return self.__id_with_type(string.lower(), GraphEntity.doi) def retrieve_from_pmid(self, string): return self.__id_with_type(string, GraphEntity.pmid) def retrieve_from_pmcid(self, string): return self.__id_with_type(string, GraphEntity.pmcid) def retrieve_from_url(self, string): return self.__id_with_type(string.lower(), GraphEntity.url) def retrieve_from_issn(self, string): return self.__id_with_type(string, GraphEntity.issn) def retrieve_from_isbn(self, string): return self.__id_with_type(string, GraphEntity.isbn) def retrieve_issue_from_journal(self, id_dict, issue_id, volume_id): if volume_id is None: return self.__retrieve_from_journal(id_dict, GraphEntity.journal_issue, issue_id) else: retrieved_volume = self.retrieve_volume_from_journal(id_dict, volume_id) if retrieved_volume is not None: query = """ SELECT DISTINCT ?br WHERE { ?br a <%s> ; <%s> <%s> ; <%s> "%s" } LIMIT 1 """ % (GraphEntity.journal_issue, GraphEntity.part_of, str(retrieved_volume), GraphEntity.has_sequence_identifier, issue_id) return self.__query(query) def retrieve_volume_from_journal(self, id_dict, volume_id): return self.__retrieve_from_journal(id_dict, GraphEntity.journal_volume, volume_id) def retrieve_br_url(self, res, string): return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.url) def retrieve_br_doi(self, res, string): return self.__retrieve_res_id_by_type(res, string.lower(), GraphEntity.doi) def retrieve_br_pmid(self, res, string): return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmid) def retrieve_br_pmcid(self, res, string): return self.__retrieve_res_id_by_type(res, string, GraphEntity.pmcid) def retrieve_last_snapshot(self, prov_subj): query = """ SELECT DISTINCT ?se WHERE { ?se <%s> <%s> . FILTER NOT EXISTS {?se <%s> ?ca } } LIMIT 1 """ % (ProvEntity.specialization_of, str(prov_subj), ProvEntity.was_invalidated_by) return self.__query(query) def __retrieve_res_id_by_type(self, res, id_string, id_type): if id_string is not None: query = """ SELECT DISTINCT ?id WHERE { <%s> <%s> ?id . ?id <%s> <%s> ; <%s> "%s" }""" % ( res, GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string) return self.__query(query) def __retrieve_from_journal(self, id_dict, part_type, part_seq_id): for id_type in id_dict: for id_string in id_dict[id_type]: query = """ SELECT DISTINCT ?res WHERE { ?j <%s> ?id . ?id <%s> <%s> ; <%s> "%s" . ?res a <%s> ; <%s>+ ?j ; <%s> "%s" }""" % ( GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string, part_type, GraphEntity.part_of, GraphEntity.has_sequence_identifier, part_seq_id ) return self.__query(query) def __id_with_type(self, id_string, id_type, extras=""): query = """ SELECT DISTINCT ?res WHERE { ?res <%s> ?id . ?id <%s> <%s> ; <%s> "%s" . %s }""" % ( GraphEntity.has_identifier, GraphEntity.uses_identifier_scheme, id_type, GraphEntity.has_literal_value, id_string, extras) return self.__query(query) def __query(self, query): if self.ts is not None: result = self.ts.query(query) for res, in result: return res # If nothing has been returned, check if there is something # in the current graph set result = self.g.query(query) for res, in result: return res
def __post_init__(self): self.storer = Storer(exit_dump=False) self.g = veusz.Embedded(name=self.title, hidden=self.hidden) self.g.EnableToolbar() self.init_pages()
from telegram import User from telegram.ext import Updater, CommandHandler import logging import checker import os from time import time, ctime from storer import Storer from cardinfo import CardInfo, ThresholdExceedListener from userinfo import UserInfo STORED_FILE = os.getenv('STRELKA_STORED_FILE', 'strelka_bot_shelve.db') TOKEN_FILENAME = os.getenv('STRELKA_TOKEN_FILE', 'token.lst') BALANCE_CHECK_INTERVAL_SEC = 3600 # 1 hour users = {} storer = Storer(STORED_FILE) job_queue = None # Enable Logging logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) def get_description(): return """/help - Show help /getcardbalance - Returns balance for specified card /addcard - Add a card to the list of registered cards /removecard - Remove a card to the list of registered cards /getcards - Returns balance for all registered cards
def test_get_item_bpz2(self): """ Test get item procedure """ s = Storer(dump_path=PATH_DUMPS, dump_name=DUMP_NAME, verbose=VERBOSE, _test=True) s.put(1, name="one") s.put(2, name="two") three = s.get("three") assert three == False # "Should be False!" s.put(3, name="three") s.dump() # here is new data in storer three = s.get("three") assert three == 3, f"got something different: [{three}]" # "Should be 3!" s._cleanup()