Exemple #1
0
def fs_ugraph_start_job(dataset, U, stats, options):
    """"""

    features = [
        # fs = feature set
        f_global_clustering,  #f_local_clustering, 
        # f_avg_shortest_path,
    ]

    if not args['from_file']:
        db = SqliteHelper()

    for ftr in features:
        ftr(U, stats, options)

        if not args['print_stats'] and not args['from_file']:
            db.save_stats(dataset, stats)
Exemple #2
0
def fs_digraph_start_job(dataset, D, stats, options):
    """"""

    features = [
        # fs = feature set
        fs_digraph_using_basic_properties,
        fs_digraph_using_degree,
        fs_digraph_using_indegree,
        f_centralization,
        f_reciprocity,
        f_pseudo_diameter,
        f_local_clustering,
        f_pagerank,
        f_eigenvector_centrality,
    ]

    if not args['from_file']:
        db = SqliteHelper()

    for ftr in features:
        ftr(D, stats, options)

        if not args['print_stats'] and not args['from_file']:
            db.save_stats(dataset, stats)
Exemple #3
0
        required=False,
        type=int,
        default=1,
        help=
        'Number of CPU cores/datasets to use in parallel for preparation. Handy when working with multiple datasets. Default 1. Max 20.'
    )

    # args is available globaly
    args = vars(parser.parse_args()).copy()

    log.info('graph.tasks.prepare: Welcome')

    # option 2
    if args['from_db']:
        log.info('Requested to prepare graph from db')
        db = SqliteHelper()

        # respect --use-datasets argument
        log.debug('Configured datasets: ' + ', '.join(args['from_db']))
        datasets = db.get_datasets_and_formats(args['from_db'])
    else:
        log.info('Requested to prepare graph from file')
        datasets = args['from_file']  # argparse returns [[..], [..],..]

        # flattens the 2-d array and checks length
        datasets_flat = [nested for dataset in datasets for nested in dataset]
        if len( datasets_flat ) == 0 \
            or len( datasets_flat ) < 3:
            log.error(
                'No datasets specified or wrong parameter format, exiting. \n\n\tPlease specify exactly as follows: --from-file <name> <filename> <format> [--from-file ...]\n\n\tname\t: name of the dataset, i.e., corresponding folder in dumps/, e.g. worldbank-linked-data\n\tfilename: the name of the file in the corresponding folder (may be an archive)\n\tformat\t: one of %s\n'
                % ','.join(SHORT_FORMAT_MAP.keys()))
 def __init__(self):
     self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)
     self.index = 0
Exemple #5
0
from db.SqliteHelper import SqliteHelper

log = logging.getLogger(__name__)

# ----------------

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='lodcc')

    parser.add_argument('--init-db', '-dbi', action="store_true", help='')
    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        required=False,
                        default=-1,
                        help='')

    args = vars(parser.parse_args()).copy()
    db = SqliteHelper(init_db=args['init_db'])

    #
    datasets = db.get_datasets(columns=['id', 'url', 'name'],
                               limit=args['limit'])

    for ds in datasets:
        res = get_parse_datapackage(ds[0], ds[1], ds[2])

        for r in res:
            # r is a tuple of shape (id,name,attribute,value)
            db.save_attribute(r)
 def initdb(self):
     self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)