def fs_ugraph_start_job(dataset, U, stats, options): """""" features = [ # fs = feature set f_global_clustering, #f_local_clustering, # f_avg_shortest_path, ] if not args['from_file']: db = SqliteHelper() for ftr in features: ftr(U, stats, options) if not args['print_stats'] and not args['from_file']: db.save_stats(dataset, stats)
class IpDataHelper(SqliteHelper): ''' proxy ip data ip:,port:,types:,protocol:,country:,area:,speed: ''' def __init__(self): self.index = 0 def initdb(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE) def __del__(self): del self def create(self): create_table_sql = '''CREATE TABLE %s ( `id` int(10) NOT NULL, `ip` varchar(16) DEFAULT NULL, `port` int DEFAULT NULL, `types` int DEFAULT NULL, `protocol` int DEFAULT NULL, `country` varchar(100) DEFAULT NULL, `area` varchar(100) DEFAULT NULL, `speed` int DEFAULT NULL, `score` int DEFAULT NULL, PRIMARY KEY (`id`) )''' % DB_CONFIG_TABLE self.sqlhelper.create(create_table_sql) def insert(self, data): ''' insert items :param data:data tuple :return: ''' save_sql = 'INSERT INTO %s values (?, ?, ?, ?, ?, ?, ?, ?, ?)' % DB_CONFIG_TABLE self.sqlhelper.insert(save_sql, data)
def fs_digraph_start_job(dataset, D, stats, options): """""" features = [ # fs = feature set fs_digraph_using_basic_properties, fs_digraph_using_degree, fs_digraph_using_indegree, f_centralization, f_reciprocity, f_pseudo_diameter, f_local_clustering, f_pagerank, f_eigenvector_centrality, ] if not args['from_file']: db = SqliteHelper() for ftr in features: ftr(D, stats, options) if not args['print_stats'] and not args['from_file']: db.save_stats(dataset, stats)
required=False, type=int, default=1, help= 'Number of CPU cores/datasets to use in parallel for preparation. Handy when working with multiple datasets. Default 1. Max 20.' ) # args is available globaly args = vars(parser.parse_args()).copy() log.info('graph.tasks.prepare: Welcome') # option 2 if args['from_db']: log.info('Requested to prepare graph from db') db = SqliteHelper() # respect --use-datasets argument log.debug('Configured datasets: ' + ', '.join(args['from_db'])) datasets = db.get_datasets_and_formats(args['from_db']) else: log.info('Requested to prepare graph from file') datasets = args['from_file'] # argparse returns [[..], [..],..] # flattens the 2-d array and checks length datasets_flat = [nested for dataset in datasets for nested in dataset] if len( datasets_flat ) == 0 \ or len( datasets_flat ) < 3: log.error( 'No datasets specified or wrong parameter format, exiting. \n\n\tPlease specify exactly as follows: --from-file <name> <filename> <format> [--from-file ...]\n\n\tname\t: name of the dataset, i.e., corresponding folder in dumps/, e.g. worldbank-linked-data\n\tfilename: the name of the file in the corresponding folder (may be an archive)\n\tformat\t: one of %s\n' % ','.join(SHORT_FORMAT_MAP.keys()))
def __init__(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE) self.index = 0
class QSDataHelper(SqliteHelper): def __init__(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE) self.index = 0 def __del__(self): del self def create(self): create_table_sql = '''CREATE TABLE %s ( `id` int(11) NOT NULL, `md5` varchar(32) DEFAULT NULL, `author` varchar(20) DEFAULT NULL, `content` varchar(500) DEFAULT NULL, `pic` varchar(200) DEFAULT NULL, PRIMARY KEY (`id`) )''' % DB_CONFIG_TABLE self.sqlhelper.create(create_table_sql) def insert(self, data): ''' insert items :param data:data tuple :return: ''' save_sql = 'INSERT INTO %s values (?, ?, ?, ?, ?)' % DB_CONFIG_TABLE self.sqlhelper.insert(save_sql, data) def update(self, rows=None): ''' update items :param rows: :return: ''' if rows: for item in rows: update_sql = '''UPDATE %(table)s SET \ md5 = '%(md5)s' , \ author = '%(author)s' , \ content = '%(content)s' WHERE \ ID = ?''' % { 'table': DB_CONFIG_TABLE, 'md5': item['md5'], 'author': item['author'], 'content': item['content'] } data = [ (item['id'], ), ] self.sqlhelper.update(update_sql, data) def delete(self, conditions=None): ''' delete items all :return: ''' if conditions is None: update_sql = 'DELETE FROM %s ' % DB_CONFIG_TABLE self.sqlhelper.delete(update_sql) else: for item in conditions: update_sql = 'DELETE FROM %s WHERE id = ?' % DB_CONFIG_TABLE data = [ (item['id'], ), ] self.sqlhelper.delete(update_sql, data) def get_diff_items_num(self): ''' get the different item counts from the database :return: num ''' sql = 'SELECT MIN(id), md5, COUNT(md5) FROM {} GROUP by md5 HAVING COUNT(md5) > 1'.format( DB_CONFIG_TABLE) return self.sqlhelper.excu_select(sql) def get_diff_items(self): ''' get the different items from the database :return: ''' sql = 'SELECT id, md5, COUNT(md5) FROM {} GROUP by md5'.format( DB_CONFIG_TABLE) return self.sqlhelper.excu_select(sql) def delete_repeated_items(self): ''' get the different item counts from the database :return: num ''' sql = 'DELETE FROM {} WHERE id in ' \ '(SELECT id FROM {})' \ 'AND id not in (SELECT min(id) FROM {} GROUP by md5 HAVING COUNT(md5) > 1)'. \ format(DB_CONFIG_TABLE, DB_CONFIG_TABLE, DB_CONFIG_TABLE) return self.sqlhelper.excu(sql) def get_max_data_id(self): ''' get the max id from the data :return: ''' sql = 'SELECT MAX(id) FROM {}'.format(DB_CONFIG_TABLE) return self.sqlhelper.excu_select(sql) def fetchonebymd5(self, md5): ''' get the item from the data by md5 :return: ''' sql = 'SELECT * FROM {} WHERE md5 = ?'.format(DB_CONFIG_TABLE) return self.sqlhelper.fetchone(sql, md5) def get_a_item(self): ''' return item from database order by id :return: ''' sql_get_ids = 'SELECT id FROM {} ORDER BY id ASC'.format( DB_CONFIG_TABLE) sql_get_item = 'SELECT * FROM {} WHERE id = ?'.format(DB_CONFIG_TABLE) try: ids = [] ids = self.sqlhelper.excu_select(sql_get_ids) self.total = len(ids) id = self.index % self.total item = self.sqlhelper.fetchone(sql_get_item, ids[id][0]) self.index += 1 return dict(id=item[0], md5=item[1], author=item[2], content=item[3]) except Exception, why: print why.message
from db.SqliteHelper import SqliteHelper log = logging.getLogger(__name__) # ---------------- if __name__ == '__main__': parser = argparse.ArgumentParser(description='lodcc') parser.add_argument('--init-db', '-dbi', action="store_true", help='') parser.add_argument('--limit', '-l', type=int, required=False, default=-1, help='') args = vars(parser.parse_args()).copy() db = SqliteHelper(init_db=args['init_db']) # datasets = db.get_datasets(columns=['id', 'url', 'name'], limit=args['limit']) for ds in datasets: res = get_parse_datapackage(ds[0], ds[1], ds[2]) for r in res: # r is a tuple of shape (id,name,attribute,value) db.save_attribute(r)
def initdb(self): self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)
parser.add_argument( '--dump-graph', '-gd', required=False, type=bool, default=True, help= 'Dumps the instantiated graph from the edgelist (csv) as a optimized binary archive that is preferred in future analyses. Defaut True.' ) # args is available globaly args = vars(parser.parse_args()).copy() if args['from_db']: log.info('Requested to prepare graph from db') db = SqliteHelper() # respect --use-datasets argument log.debug('Configured datasets: ' + ', '.join(args['from_db'])) datasets = db.get_datasets_and_paths(args['from_db']) else: datasets = args['from_file'] # argparse returns [[..], [..]] datasets = list( map( lambda ds: { # to be compatible with existing build_graph function we transform the array to a dict 'name': ds[0], 'path_edgelist': 'dumps/%s/data.edgelist.csv' % ds[0], 'path_graph_gt': 'dumps/%s/data.graph.gt.gz' % ds[0] }, datasets))