コード例 #1
0
ファイル: test_graph.py プロジェクト: flaviovdf/tag_assess
 def setUp(self):
     self.annots = []
     parser = data_parser.Parser()
     with open(test.SMALL_DEL_FILE) as in_f:
         for annot in parser.iparse(in_f,
                                    data_parser.delicious_flickr_parser):
             self.annots.append(annot)
コード例 #2
0
 def create_annots(self, fpath):
     parser = data_parser.Parser()
     annots = []
     with open(fpath) as in_f:
         for annot in parser.iparse(in_f, 
                                    data_parser.delicious_flickr_parser):
             annots.append(annot)
     
     return annots
コード例 #3
0
ファイル: ParseFile.py プロジェクト: flaviovdf/tag_assess
def main(args=[]):

    if len(args) < 4:
        types = '{flickr, delicious, bibsonomy, connotea, citeulike, lt}'
        print('Usage %s %s %s %s %s' %
              (args[0], '<annotation_file>', '<database_file>', '<ids folder>',
               '<ftype = %s>' % types),
              file=sys.stderr)
        return 1

    func_map = {
        'bibsonomy': data_parser.bibsonomy_parser,
        'citeulike': data_parser.citeulike_parser,
        'connotea': data_parser.connotea_parser,
        'delicious': data_parser.delicious_flickr_parser,
        'flickr': data_parser.delicious_flickr_parser,
        'lt': data_parser.library_thing_parser
    }

    in_fpath = args[1]
    db_fpath = args[2]
    ids_folder = args[3]
    func_name = args[4]

    if func_name not in func_map:
        print('ftype %s unknown' % func_name)
        return 1
    parse_func = func_map[func_name]

    #Saving Table to PyTables
    parser = data_parser.Parser()
    with open(in_fpath) as annotf, AnnotWriter(db_fpath) as writer:
        writer.create_table(func_name)

        for annotation in parser.iparse(annotf, parse_func, sys.stderr):
            writer.append_row(annotation)

    #Saving IDs to text files
    user_ids = parser.user_ids
    item_ids = parser.item_ids
    tag_ids = parser.tag_ids

    with open(os.path.join(ids_folder, func_name + '.user'), 'w') as userf:
        for user in sorted(user_ids, key=user_ids.__getitem__):
            print(user[1], user_ids[user], file=userf)

    with open(os.path.join(ids_folder, func_name + '.items'), 'w') as itemsf:
        for item in sorted(item_ids, key=item_ids.__getitem__):
            print(item[1], item_ids[item], file=itemsf)

    with open(os.path.join(ids_folder, func_name + '.tags'), 'w') as tagsf:
        for tag in sorted(tag_ids, key=tag_ids.__getitem__):
            print(tag[1], tag_ids[tag], file=tagsf)
コード例 #4
0
def write_good_annots(database, table, new_database, good_items):
    '''Writes new annotations based on filters'''
    with AnnotReader(database) as reader, AnnotWriter(new_database) as writer:
        reader.change_table(table)
        writer.create_table(table)
        iterator = reader.iterate(query={'item': {'$in': good_items}})

        parser = data_parser.Parser()
        iparse = parser.iparse(iterator, data_parser.json_parser)
        for new_annot in iparse:
            writer.append_row(new_annot)

    return parser.user_ids, parser.item_ids, parser.tag_ids
コード例 #5
0
    def test_metrics_small_file(self):
        p = data_parser.Parser()
        with open(test.SMALL_DEL_FILE) as f:
            annots = [
                a for a in p.iparse(f, data_parser.delicious_flickr_parser)
            ]

        item_tag_frequencies, collection_item_frequency, \
            collection_tag_frequency1 = create_metrics_index(annots, 'item', 'tag')

        user_tag_frequencies, collection_user_frequency, \
            collection_tag_frequency2 = create_metrics_index(annots, 'user', 'tag')

        self.assertEquals(collection_item_frequency[0], 5)
        self.assertEquals(collection_item_frequency[1], 1)
        self.assertEquals(collection_item_frequency[2], 2)
        self.assertEquals(collection_item_frequency[3], 1)
        self.assertEquals(collection_item_frequency[4], 1)

        self.assertEquals(collection_user_frequency[0], 4)
        self.assertEquals(collection_user_frequency[1], 4)
        self.assertEquals(collection_user_frequency[2], 2)

        self.assertEquals(item_tag_frequencies[0][0], 2)
        self.assertEquals(item_tag_frequencies[0][1], 1)
        self.assertEquals(item_tag_frequencies[0][2], 0)
        self.assertEquals(item_tag_frequencies[0][3], 1)
        self.assertEquals(item_tag_frequencies[0][4], 1)
        self.assertEquals(item_tag_frequencies[0][5], 0)

        self.assertEquals(user_tag_frequencies[2][0], 0)
        self.assertEquals(user_tag_frequencies[2][1], 0)
        self.assertEquals(user_tag_frequencies[2][2], 0)
        self.assertEquals(user_tag_frequencies[2][3], 0)
        self.assertEquals(user_tag_frequencies[2][4], 1)
        self.assertEquals(user_tag_frequencies[2][5], 1)

        self.assertEquals(collection_tag_frequency1[0], 3)
        self.assertEquals(collection_tag_frequency1[1], 3)
        self.assertEquals(collection_tag_frequency1[2], 1)
        self.assertEquals(collection_tag_frequency1[3], 1)
        self.assertEquals(collection_tag_frequency1[4], 1)
        self.assertEquals(collection_tag_frequency1[5], 1)
        self.assertEquals(collection_tag_frequency1, collection_tag_frequency2)
コード例 #6
0
    def base_tfunc(self, fpath, parse_func):
        '''
        This simple test writes annotations to h5 file and reads them back. 
        Comparing if both are equal.
        '''
        parser = data_parser.Parser()
        written_list = []
        n_lines = 0
        with open(fpath) as in_f:
            with annotations.AnnotWriter(self.h5_file) as writer:
                writer.create_table('bibs')
                for annot in parser.iparse(in_f, parse_func):
                    written_list.append(tuple(sorted(annot.items())))
                    writer.append_row(annot)
                    n_lines += 1

        read_list = []
        with annotations.AnnotReader(self.h5_file) as reader:
            reader.change_table('bibs')
            for annot in reader.iterate():
                read_list.append(tuple(sorted(annot.items())))
        self.assertEquals(n_lines, len(written_list))
        self.assertEquals(read_list, written_list)
コード例 #7
0
    def test_iparse(self):
        fakef = StringIO.StringIO()
        fakef.writelines([
            DELICIOUS_LINE1 + '\n', DELICIOUS_LINE2 + '\n',
            DELICIOUS_LINE3 + '\n', DELICIOUS_LINE4 + '\n', DELICIOUS_LINE5
        ])
        fakef.seek(0)

        p = data_parser.Parser()
        annots = [
            a for a in p.iparse(fakef, data_parser.delicious_flickr_parser)
        ]

        self.assertEqual(0, annots[0]['user'])
        self.assertEqual(0, annots[0]['item'])
        self.assertEqual(0, annots[0]['tag'])
        self.assertEqual('2003-01-01 01:00:00',
                         convert_time(annots[0]['date']))

        self.assertEqual(0, annots[1]['user'])
        self.assertEqual(1, annots[1]['item'])
        self.assertEqual(1, annots[1]['tag'])
        self.assertEqual('2011-02-17 11:10:20',
                         convert_time(annots[1]['date']))

        self.assertEqual(1, annots[2]['user'])
        self.assertEqual(1, annots[2]['item'])
        self.assertEqual(2, annots[2]['tag'])

        self.assertEqual(2, annots[3]['user'])
        self.assertEqual(1, annots[3]['item'])
        self.assertEqual(0, annots[3]['tag'])

        self.assertEqual(3, annots[4]['user'])
        self.assertEqual(2, annots[4]['item'])
        self.assertEqual(3, annots[4]['tag'])
コード例 #8
0
 def test_with_file(self):
     p = data_parser.Parser()
     with open(test.BIBSONOMY_FILE) as f:
         annots = [a for a in p.iparse(f, data_parser.bibsonomy_parser)]
         self.assertEquals(10000, len(annots))
コード例 #9
0
 def __init_test(self, annot_file):
     parser = data_parser.Parser()
     with open(annot_file) as in_f:
         for annot in parser.iparse(in_f,
                                    data_parser.delicious_flickr_parser):
             self.annots.append(annot)