Exemple #1
0
def fs_ugraph_start_job(dataset, U, stats, options):
    """"""

    features = [
        # fs = feature set
        f_global_clustering,  #f_local_clustering, 
        # f_avg_shortest_path,
    ]

    if not args['from_file']:
        db = SqliteHelper()

    for ftr in features:
        ftr(U, stats, options)

        if not args['print_stats'] and not args['from_file']:
            db.save_stats(dataset, stats)
class IpDataHelper(SqliteHelper):
    '''
    proxy ip data
    ip:,port:,types:,protocol:,country:,area:,speed:
    '''

    def __init__(self):
        self.index = 0

    def initdb(self):
        self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)

    def __del__(self):
        del self

    def create(self):
        create_table_sql = '''CREATE TABLE %s (
                                      `id` int(10) NOT NULL,
                                      `ip` varchar(16) DEFAULT NULL,
                                      `port` int DEFAULT NULL,
                                      `types` int DEFAULT NULL,
                                      `protocol` int DEFAULT NULL,
                                      `country` varchar(100) DEFAULT NULL,
                                      `area` varchar(100) DEFAULT NULL,
                                      `speed` int DEFAULT NULL,
                                      `score` int DEFAULT NULL,
                                       PRIMARY KEY (`id`)
                                    )''' % DB_CONFIG_TABLE
        self.sqlhelper.create(create_table_sql)

    def insert(self, data):
        '''
        insert items
        :param data:data tuple
        :return:
        '''
        save_sql = 'INSERT INTO %s values (?, ?, ?, ?, ?, ?, ?, ?, ?)' % DB_CONFIG_TABLE
        self.sqlhelper.insert(save_sql, data)
class IpDataHelper(SqliteHelper):
    '''
    proxy ip data
    ip:,port:,types:,protocol:,country:,area:,speed:
    '''
    def __init__(self):
        self.index = 0

    def initdb(self):
        self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)

    def __del__(self):
        del self

    def create(self):
        create_table_sql = '''CREATE TABLE %s (
                                      `id` int(10) NOT NULL,
                                      `ip` varchar(16) DEFAULT NULL,
                                      `port` int DEFAULT NULL,
                                      `types` int DEFAULT NULL,
                                      `protocol` int DEFAULT NULL,
                                      `country` varchar(100) DEFAULT NULL,
                                      `area` varchar(100) DEFAULT NULL,
                                      `speed` int DEFAULT NULL,
                                      `score` int DEFAULT NULL,
                                       PRIMARY KEY (`id`)
                                    )''' % DB_CONFIG_TABLE
        self.sqlhelper.create(create_table_sql)

    def insert(self, data):
        '''
        insert items
        :param data:data tuple
        :return:
        '''
        save_sql = 'INSERT INTO %s values (?, ?, ?, ?, ?, ?, ?, ?, ?)' % DB_CONFIG_TABLE
        self.sqlhelper.insert(save_sql, data)
Exemple #4
0
def fs_digraph_start_job(dataset, D, stats, options):
    """"""

    features = [
        # fs = feature set
        fs_digraph_using_basic_properties,
        fs_digraph_using_degree,
        fs_digraph_using_indegree,
        f_centralization,
        f_reciprocity,
        f_pseudo_diameter,
        f_local_clustering,
        f_pagerank,
        f_eigenvector_centrality,
    ]

    if not args['from_file']:
        db = SqliteHelper()

    for ftr in features:
        ftr(D, stats, options)

        if not args['print_stats'] and not args['from_file']:
            db.save_stats(dataset, stats)
Exemple #5
0
        required=False,
        type=int,
        default=1,
        help=
        'Number of CPU cores/datasets to use in parallel for preparation. Handy when working with multiple datasets. Default 1. Max 20.'
    )

    # args is available globaly
    args = vars(parser.parse_args()).copy()

    log.info('graph.tasks.prepare: Welcome')

    # option 2
    if args['from_db']:
        log.info('Requested to prepare graph from db')
        db = SqliteHelper()

        # respect --use-datasets argument
        log.debug('Configured datasets: ' + ', '.join(args['from_db']))
        datasets = db.get_datasets_and_formats(args['from_db'])
    else:
        log.info('Requested to prepare graph from file')
        datasets = args['from_file']  # argparse returns [[..], [..],..]

        # flattens the 2-d array and checks length
        datasets_flat = [nested for dataset in datasets for nested in dataset]
        if len( datasets_flat ) == 0 \
            or len( datasets_flat ) < 3:
            log.error(
                'No datasets specified or wrong parameter format, exiting. \n\n\tPlease specify exactly as follows: --from-file <name> <filename> <format> [--from-file ...]\n\n\tname\t: name of the dataset, i.e., corresponding folder in dumps/, e.g. worldbank-linked-data\n\tfilename: the name of the file in the corresponding folder (may be an archive)\n\tformat\t: one of %s\n'
                % ','.join(SHORT_FORMAT_MAP.keys()))
 def __init__(self):
     self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)
     self.index = 0
class QSDataHelper(SqliteHelper):
    def __init__(self):
        self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)
        self.index = 0

    def __del__(self):
        del self

    def create(self):
        create_table_sql = '''CREATE TABLE %s (
                                      `id` int(11) NOT NULL,
                                      `md5` varchar(32) DEFAULT NULL,
                                      `author` varchar(20) DEFAULT NULL,
                                      `content` varchar(500) DEFAULT NULL,
                                      `pic` varchar(200) DEFAULT NULL,
                                       PRIMARY KEY (`id`)
                                    )''' % DB_CONFIG_TABLE
        self.sqlhelper.create(create_table_sql)

    def insert(self, data):
        '''
        insert items
        :param data:data tuple
        :return:
        '''
        save_sql = 'INSERT INTO %s values (?, ?, ?, ?, ?)' % DB_CONFIG_TABLE
        self.sqlhelper.insert(save_sql, data)

    def update(self, rows=None):
        '''
        update items
        :param rows:
        :return:
        '''
        if rows:
            for item in rows:
                update_sql = '''UPDATE %(table)s SET  \
                             md5 = '%(md5)s' ,  \
                             author = '%(author)s' ,  \
                             content = '%(content)s' WHERE  \
                             ID = ?''' % {
                    'table': DB_CONFIG_TABLE,
                    'md5': item['md5'],
                    'author': item['author'],
                    'content': item['content']
                }
                data = [
                    (item['id'], ),
                ]
                self.sqlhelper.update(update_sql, data)

    def delete(self, conditions=None):
        '''
        delete items all
        :return:
        '''
        if conditions is None:
            update_sql = 'DELETE FROM %s ' % DB_CONFIG_TABLE
            self.sqlhelper.delete(update_sql)
        else:
            for item in conditions:
                update_sql = 'DELETE FROM %s WHERE id = ?' % DB_CONFIG_TABLE
                data = [
                    (item['id'], ),
                ]
                self.sqlhelper.delete(update_sql, data)

    def get_diff_items_num(self):
        '''
        get the different item counts from the database
        :return: num
        '''
        sql = 'SELECT MIN(id), md5, COUNT(md5) FROM {} GROUP by md5 HAVING COUNT(md5) > 1'.format(
            DB_CONFIG_TABLE)
        return self.sqlhelper.excu_select(sql)

    def get_diff_items(self):
        '''
        get the different items from the database
        :return:
        '''
        sql = 'SELECT id, md5, COUNT(md5) FROM {} GROUP by md5'.format(
            DB_CONFIG_TABLE)
        return self.sqlhelper.excu_select(sql)

    def delete_repeated_items(self):
        '''
        get the different item counts from the database
        :return: num
        '''
        sql = 'DELETE FROM {} WHERE id in ' \
              '(SELECT id FROM {})' \
              'AND id not in (SELECT min(id) FROM {} GROUP by md5 HAVING COUNT(md5) > 1)'. \
            format(DB_CONFIG_TABLE, DB_CONFIG_TABLE, DB_CONFIG_TABLE)
        return self.sqlhelper.excu(sql)

    def get_max_data_id(self):
        '''
        get the max id from the data
        :return:
        '''
        sql = 'SELECT MAX(id) FROM {}'.format(DB_CONFIG_TABLE)
        return self.sqlhelper.excu_select(sql)

    def fetchonebymd5(self, md5):
        '''
        get the item from the data by md5
        :return:
        '''
        sql = 'SELECT * FROM {} WHERE md5 = ?'.format(DB_CONFIG_TABLE)
        return self.sqlhelper.fetchone(sql, md5)

    def get_a_item(self):
        '''
        return item from database order by id
        :return:
        '''
        sql_get_ids = 'SELECT id FROM {} ORDER BY id ASC'.format(
            DB_CONFIG_TABLE)
        sql_get_item = 'SELECT * FROM {} WHERE id = ?'.format(DB_CONFIG_TABLE)
        try:
            ids = []
            ids = self.sqlhelper.excu_select(sql_get_ids)
            self.total = len(ids)
            id = self.index % self.total
            item = self.sqlhelper.fetchone(sql_get_item, ids[id][0])
            self.index += 1
            return dict(id=item[0],
                        md5=item[1],
                        author=item[2],
                        content=item[3])
        except Exception, why:
            print why.message
Exemple #8
0
from db.SqliteHelper import SqliteHelper

log = logging.getLogger(__name__)

# ----------------

if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='lodcc')

    parser.add_argument('--init-db', '-dbi', action="store_true", help='')
    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        required=False,
                        default=-1,
                        help='')

    args = vars(parser.parse_args()).copy()
    db = SqliteHelper(init_db=args['init_db'])

    #
    datasets = db.get_datasets(columns=['id', 'url', 'name'],
                               limit=args['limit'])

    for ds in datasets:
        res = get_parse_datapackage(ds[0], ds[1], ds[2])

        for r in res:
            # r is a tuple of shape (id,name,attribute,value)
            db.save_attribute(r)
 def initdb(self):
     self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)
Exemple #10
0
    parser.add_argument(
        '--dump-graph',
        '-gd',
        required=False,
        type=bool,
        default=True,
        help=
        'Dumps the instantiated graph from the edgelist (csv) as a optimized binary archive that is preferred in future analyses. Defaut True.'
    )

    # args is available globaly
    args = vars(parser.parse_args()).copy()

    if args['from_db']:
        log.info('Requested to prepare graph from db')
        db = SqliteHelper()

        # respect --use-datasets argument
        log.debug('Configured datasets: ' + ', '.join(args['from_db']))
        datasets = db.get_datasets_and_paths(args['from_db'])
    else:
        datasets = args['from_file']  # argparse returns [[..], [..]]
        datasets = list(
            map(
                lambda ds:
                {  # to be compatible with existing build_graph function we transform the array to a dict
                    'name': ds[0],
                    'path_edgelist': 'dumps/%s/data.edgelist.csv' % ds[0],
                    'path_graph_gt': 'dumps/%s/data.graph.gt.gz' % ds[0]
                },
                datasets))
Exemple #11
0
 def initdb(self):
     self.sqlhelper = SqliteHelper(DB_CONFIG_FILE)