def update(self): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples :return: list(graph, graph): The add and sub graphs for performing the update """ from vivopump import read_csv, get_graph from rdflib import Graph import logging import os.path import time logging.basicConfig(level=logging.INFO) if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=self.inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if name in self.update_data[self.update_data.keys()[0]].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns if self.original_graph is None: # Test for injection # Create the original graph from VIVO self.original_graph = get_graph(self.update_def, self.query_parms, debug=self.verbose) self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) if self.verbose: print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \ '. Update graph has', len(self.update_graph) print datetime.now(), 'Updates ready for processing. ', len( self.update_data), 'rows.' if len(self.enum) == 0: print datetime.now(), "No enumerations" else: for key in self.enum.keys(): print datetime.now(), key, "modified", time.ctime(os.path.getmtime(key)), \ "get", len(self.enum[key]['get']), "update", \ len(self.enum[key]['update']) return self.__do_update()
def update(self, filename=None, inter='\t', intra=';'): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples """ from vivopump import read_csv, get_graph from rdflib import Graph import logging self.intra = intra self.inter = inter logging.basicConfig(level=logging.INFO) if filename is not None: self.out_filename = filename if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if name in self.update_data[1].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns self.enum = load_enum(self.update_def) if self.original_graph is None: # Test for injection self.original_graph = get_graph( self.update_def, debug=self.verbose) # Create the original graph from VIVO self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) if self.verbose: print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \ '. Update graph has', len(self.update_graph) print datetime.now(), 'Updates ready for processing. ', len( self.update_data), 'rows.' if len(self.enum) == 0: print datetime.now(), "No enumerations" else: for key in self.enum.keys(): print datetime.now(), key, "get", len(self.enum[key]['get']), "update", \ len(self.enum[key]['update']) return self.do_update()
def update(self, filename=None, inter='\t', intra=';'): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples """ from vivopump import read_csv, get_graph from rdflib import Graph import logging self.intra = intra self.inter = inter logging.basicConfig(level=logging.INFO) if filename is not None: self.out_filename = filename if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if name in self.update_data[1].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns self.enum = load_enum(self.update_def) if self.original_graph is None: # Test for injection self.original_graph = get_graph(self.update_def, debug=self.verbose) # Create the original graph from VIVO self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) if self.verbose: print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \ '. Update graph has', len(self.update_graph) print datetime.now(), 'Updates ready for processing. ', len(self.update_data), 'rows.' if len(self.enum) == 0: print datetime.now(), "No enumerations" else: for key in self.enum.keys(): print datetime.now(), key, "get", len(self.enum[key]['get']), "update", \ len(self.enum[key]['update']) return self.do_update()
def update(self): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples :return: list(graph, graph): The add and sub graphs for performing the update """ from vivopump import read_csv, get_graph from rdflib import Graph import logging import os.path import time logging.basicConfig(level=logging.INFO) if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=self.inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if name in self.update_data[self.update_data.keys()[0]].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns if self.original_graph is None: # Test for injection # Create the original graph from VIVO self.original_graph = get_graph(self.update_def, self.query_parms, debug=self.verbose) self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) if self.verbose: print datetime.now(), 'Graphs ready for processing. Original has ', len(self.original_graph), \ '. Update graph has', len(self.update_graph) print datetime.now(), 'Updates ready for processing. ', len(self.update_data), 'rows.' if len(self.enum) == 0: print datetime.now(), "No enumerations" else: for key in self.enum.keys(): print datetime.now(), key, "modified", time.ctime(os.path.getmtime(key)), \ "get", len(self.enum[key]['get']), "update", \ len(self.enum[key]['update']) return self.__do_update()
def update(self): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples :return: list(graph, graph): The add and sub graphs for performing the update """ from vivopump import read_csv, get_graph from rdflib import Graph import os.path import time if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=self.inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if len(self.update_data) > 0 and name in self.update_data[self.update_data.keys()[0]].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns if self.original_graph is None: # Test for injection # Create the original graph from VIVO self.original_graph = get_graph(self.update_def, self.query_parms) self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) logger.info(u'Graphs ready for processing. Original has {} triples. Update graph has {} triples.'.format( len(self.original_graph), len(self.update_graph))) logger.info(u'Updates ready for processing. {} rows in update.'.format(len(self.update_data))) if len(self.enum) == 0: logger.info(u"No enumerations") else: for key in self.enum.keys(): logger.info( u"Enumeration {} modified {}. {} entries in get enum. {} entries in update enum".format( key, time.ctime(os.path.getmtime(key)), len(self.enum[key]['get']), len(self.enum[key]['update']))) return self.__do_update()
def update(self): """ Prepare for the update, getting graph and update_data. Then do the update, producing triples :return: list(graph, graph): The add and sub graphs for performing the update """ from vivopump import read_csv, get_graph from rdflib import Graph import os.path import time if self.update_data is None: # Test for injection self.update_data = read_csv(self.out_filename, delimiter=self.inter) # Narrow the update_def to include only columns that appear in the update_data new_update_columns = {} for name, path in self.update_def['column_defs'].items(): if name in self.update_data[self.update_data.keys()[0]].keys(): new_update_columns[name] = path self.update_def['column_defs'] = new_update_columns if self.original_graph is None: # Test for injection # Create the original graph from VIVO self.original_graph = get_graph(self.update_def, self.query_parms) self.update_graph = Graph() for s, p, o in self.original_graph: self.update_graph.add((s, p, o)) logger.info(u'Graphs ready for processing. Original has {} triples. Update graph has {} triples.'.format( len(self.original_graph), len(self.update_graph))) logger.info(u'Updates ready for processing. {} rows in update.'.format(len(self.update_data))) if len(self.enum) == 0: logger.info(u"No enumerations") else: for key in self.enum.keys(): logger.info( u"Enumeration {} modified {}. {} entries in get enum. {} entries in update enum".format( key, time.ctime(os.path.getmtime(key)), len(self.enum[key]['get']), len(self.enum[key]['update']))) return self.__do_update()
def test_normal_case(self): update_def = read_update_def('data/grant_def.json') a = get_graph(update_def) print len(a) self.assertTrue(len(a) == 241611)