def fs_ugraph_start_job(dataset, U, stats, options): """""" features = [ # fs = feature set f_global_clustering, #f_local_clustering, # f_avg_shortest_path, ] if not args['from_file']: db = SqliteHelper() for ftr in features: ftr(U, stats, options) if not args['print_stats'] and not args['from_file']: db.save_stats(dataset, stats)
def fs_digraph_start_job(dataset, D, stats, options): """""" features = [ # fs = feature set fs_digraph_using_basic_properties, fs_digraph_using_degree, fs_digraph_using_indegree, f_centralization, f_reciprocity, f_pseudo_diameter, f_local_clustering, f_pagerank, f_eigenvector_centrality, ] if not args['from_file']: db = SqliteHelper() for ftr in features: ftr(D, stats, options) if not args['print_stats'] and not args['from_file']: db.save_stats(dataset, stats)
required=False, type=int, default=1, help= 'Number of CPU cores/datasets to use in parallel for preparation. Handy when working with multiple datasets. Default 1. Max 20.' ) # args is available globaly args = vars(parser.parse_args()).copy() log.info('graph.tasks.prepare: Welcome') # option 2 if args['from_db']: log.info('Requested to prepare graph from db') db = SqliteHelper() # respect --use-datasets argument log.debug('Configured datasets: ' + ', '.join(args['from_db'])) datasets = db.get_datasets_and_formats(args['from_db']) else: log.info('Requested to prepare graph from file') datasets = args['from_file'] # argparse returns [[..], [..],..] # flattens the 2-d array and checks length datasets_flat = [nested for dataset in datasets for nested in dataset] if len( datasets_flat ) == 0 \ or len( datasets_flat ) < 3: log.error( 'No datasets specified or wrong parameter format, exiting. \n\n\tPlease specify exactly as follows: --from-file <name> <filename> <format> [--from-file ...]\n\n\tname\t: name of the dataset, i.e., corresponding folder in dumps/, e.g. worldbank-linked-data\n\tfilename: the name of the file in the corresponding folder (may be an archive)\n\tformat\t: one of %s\n' % ','.join(SHORT_FORMAT_MAP.keys()))
def __init__(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE) self.index = 0
from db.SqliteHelper import SqliteHelper log = logging.getLogger(__name__) # ---------------- if __name__ == '__main__': parser = argparse.ArgumentParser(description='lodcc') parser.add_argument('--init-db', '-dbi', action="store_true", help='') parser.add_argument('--limit', '-l', type=int, required=False, default=-1, help='') args = vars(parser.parse_args()).copy() db = SqliteHelper(init_db=args['init_db']) # datasets = db.get_datasets(columns=['id', 'url', 'name'], limit=args['limit']) for ds in datasets: res = get_parse_datapackage(ds[0], ds[1], ds[2]) for r in res: # r is a tuple of shape (id,name,attribute,value) db.save_attribute(r)
def initdb(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)