Exemple #1
0
def generate_indexer(usr_dataset, usr_bm_tg, feature_begin, feature_end):
    logging.info('generating indexer ...')
    indexer = Indexer(['user', 'tag', 'bookmark'])
    min_time = 1e30
    max_time = -1

    for line in usr_dataset[1:]:
        line_items = line.split('\t')
        contact_timestamp = float(line_items[2]) / 1000
        min_time = min(min_time, contact_timestamp)
        max_time = max(max_time, contact_timestamp)
        if feature_begin < contact_timestamp <= feature_end:
            indexer.index('user', line_items[0])
            indexer.index('user', line_items[1])

    for line in usr_bm_tg[1:]:
        line_items = line.split('\t')
        tag_timestamp = float(line_items[3]) / 1000
        if feature_begin < tag_timestamp <= feature_end:
            indexer.index('user', line_items[0])
            indexer.index('bookmark', line_items[1])
            indexer.index('tag', line_items[2])

    with open('delicious/data/metadata.txt', 'w') as output:
        output.write('Nodes:\n')
        output.write('-----------------------------\n')
        output.write('#Users: %d\n' % indexer.indices['user'])
        output.write('#Tags: %d\n' % indexer.indices['tag'])
        output.write('#Bookmarks: %d\n' % indexer.indices['bookmark'])
        output.write('\nEdges:\n')
        output.write('-----------------------------\n')
        output.write('#Contact: %d\n' % len(usr_dataset))
        output.write('#Save : %d\n' % len(usr_bm_tg))
        output.write('#Attach: %d\n' % len(usr_bm_tg))
        output.write('\nTime Span:\n')
        output.write('-----------------------------\n')
        output.write('From: %s\n' % datetime.fromtimestamp(min_time))
        output.write('To: %s\n' % datetime.fromtimestamp(max_time))

    return indexer
#from preprocess.tacotron.audio import inv_spectrogram, save_wav
from scipy.io.wavfile import write
from preprocess.tacotron.mcep import mc2wav

if __name__ == '__main__':
    feature = 'sp'
    hps = Hps()
    hps.load('./hps/v19.json')
    hps_tuple = hps.get_tuple()
    solver = Solver(hps_tuple, None)
    solver.load_model('/storage/model/voice_conversion/v19/model.pkl-59999')
    if feature == 'mc':
        # indexer to extract data
        indexer = Indexer()
        src_mc = indexer.index(speaker_id='225',
                               utt_id='366',
                               dset='test',
                               feature='norm_mc')
        tar_mc = indexer.index(speaker_id='226',
                               utt_id='366',
                               dset='test',
                               feature='norm_mc')
        expand_src_mc = np.expand_dims(src_mc, axis=0)
        expand_tar_mc = np.expand_dims(tar_mc, axis=0)
        src_mc_tensor = torch.from_numpy(expand_src_mc).type(torch.FloatTensor)
        tar_mc_tensor = torch.from_numpy(expand_tar_mc).type(torch.FloatTensor)
        c1 = Variable(torch.from_numpy(np.array([0]))).cuda()
        c2 = Variable(torch.from_numpy(np.array([1]))).cuda()
        results = [src_mc]
        result = solver.test_step(src_mc_tensor, c1)
        result = result.squeeze(axis=0).transpose((1, 0))
        results.append(result)
Exemple #3
0
def generate_papers(datafile, feature_begin, feature_end, observation_begin,
                    observation_end, conf_list):
    logging.info('generating papers ...')

    # try:
    #     result = pickle.load(open('dblp/data/papers_%s.pkl' % path, 'rb'))
    #     return result
    # except IOError:
    #     pass

    indexer = Indexer(['author', 'paper', 'term', 'venue'])

    index, authors, title, year, venue = None, None, None, None, None
    references = []

    write = 0
    cite = 0
    include = 0
    published = 0

    min_year = 3000
    max_year = 0

    papers_feature_window = []
    papers_observation_window = []

    with open(datafile) as file:
        dataset = file.read().splitlines()

    for line in dataset:
        if not line:
            if year and venue:
                year = int(year)
                if year > 0 and authors and venue in conf_list:
                    min_year = min(min_year, year)
                    max_year = max(max_year, year)
                    authors = authors.split(',')
                    terms = parse_term(title)
                    write += len(authors)
                    cite += len(references)
                    include += len(terms)
                    published += 1

                    p = Paper(year)
                    if feature_begin < year <= feature_end:
                        p.id = indexer.index('paper', index)
                        p.terms = [
                            indexer.index('term', term) for term in terms
                        ]
                        p.references = [
                            indexer.index('paper', paper_id)
                            for paper_id in references
                        ]
                        p.authors = [
                            indexer.index('author', author_name)
                            for author_name in authors
                        ]
                        p.venue = indexer.index('venue', venue)
                        bisect.insort(papers_feature_window, p)
                    elif observation_begin < year <= observation_end:
                        p.references = references
                        p.authors = authors
                        papers_observation_window.append(p)

            index, authors, title, year, venue = None, None, None, None, None
            references = []
        else:
            begin = line[1]
            if begin == '*':
                title = line[2:]
            elif begin == '@':
                authors = line[2:]
            elif begin == 't':
                year = line[2:]
            elif begin == 'c':
                venue = line[2:]
            elif begin == 'i':
                index = line[6:]
            elif begin == '%':
                references.append(line[2:])

    for p in papers_observation_window:
        authors = []
        references = []
        for author in p.authors:
            author_id = indexer.get_index('author', author)
            if author_id is not None:
                authors.append(author_id)
        for ref in p.references:
            paper_id = indexer.get_index('paper', ref)
            if paper_id is not None:
                references.append(paper_id)
        p.authors = authors
        p.references = references

    with open('dblp/data/metadata_%s.txt' % path, 'w') as output:
        output.write('Nodes:\n')
        output.write('-----------------------------\n')
        output.write('#Authors: %d\n' % indexer.indices['author'])
        output.write('#Papers: %d\n' % indexer.indices['paper'])
        output.write('#Venues: %d\n' % indexer.indices['venue'])
        output.write('#Terms: %d\n\n' % indexer.indices['term'])
        output.write('\nEdges:\n')
        output.write('-----------------------------\n')
        output.write('#Write: %d\n' % write)
        output.write('#Cite: %d\n' % cite)
        output.write('#Publish: %d\n' % published)
        output.write('#Contain: %d\n' % include)
        output.write('\nTime Span:\n')
        output.write('-----------------------------\n')
        output.write('From: %s\n' % min_year)
        output.write('To: %s\n' % max_year)

    result = papers_feature_window, papers_observation_window, indexer.indices
    # pickle.dump(result, open('dblp/data/papers_%s.pkl' % path, 'wb'))
    return result
Exemple #4
0
def generate_indexer(user_rates_movies_ds, user_tags_movies_ds, movie_actor_ds,
                     movie_director_ds, movie_genre_ds, movie_countries_ds,
                     feature_begin, feature_end):
    logging.info('generating indexer ...')
    min_time = 1e30
    max_time = -1
    indexer = Indexer(
        ['user', 'tag', 'movie', 'actor', 'director', 'genre', 'country'])

    for line in user_rates_movies_ds[1:]:
        line_items = line.split('\t')
        rating_timestamp = float(line_items[3]) / 1000
        min_time = min(min_time, rating_timestamp)
        max_time = max(max_time, rating_timestamp)
        rating = float(line_items[2])
        if feature_begin < rating_timestamp <= feature_end and rating > rating_threshold:
            indexer.index('user', line_items[0])
            indexer.index('movie', line_items[1])

    for line in user_tags_movies_ds[1:]:
        line_items = line.split('\t')
        tag_timestamp = float(line_items[3]) / 1000
        if feature_begin < tag_timestamp <= feature_end:
            indexer.index('user', line_items[0])
            indexer.index('movie', line_items[1])
            indexer.index('tag', line_items[2])

    for line in movie_actor_ds[1:]:
        line_items = line.split('\t')
        ranking = int(line_items[3])
        if ranking < actor_threshold and line_items[0] in indexer.mapping[
                'movie']:
            # indexer.index('movie', line_items[0])
            indexer.index('actor', line_items[1])

    for line in movie_director_ds[1:]:
        line_items = line.split('\t')
        if line_items[0] in indexer.mapping['movie']:
            # indexer.index('movie', line_items[0])
            indexer.index('director', line_items[1])

    for line in movie_genre_ds[1:]:
        line_items = line.split('\t')
        if line_items[0] in indexer.mapping['movie']:
            # indexer.index('movie', line_items[0])
            indexer.index('genre', line_items[1])

    for line in movie_countries_ds[1:]:
        line_items = line.split('\t')
        if line_items[0] in indexer.mapping['movie']:
            # indexer.index('movie', line_items[0])
            indexer.index('country', line_items[1])

    with open('movielens/data/metadata.txt', 'w') as output:
        output.write('Nodes:\n')
        output.write('-----------------------------\n')
        output.write('#Users: %d\n' % indexer.indices['user'])
        output.write('#Tags: %d\n' % indexer.indices['tag'])
        output.write('#Movies: %d\n' % indexer.indices['movie'])
        output.write('#Actors: %d\n' % indexer.indices['actor'])
        output.write('#Director: %d\n' % indexer.indices['director'])
        output.write('#Genre: %d\n' % indexer.indices['genre'])
        output.write('#Countriy: %d\n' % indexer.indices['country'])
        output.write('\nEdges:\n')
        output.write('-----------------------------\n')
        output.write('#Rate: %d\n' % len(user_rates_movies_ds))
        output.write('#Attach: %d\n' % len(user_tags_movies_ds))
        output.write('#Played_by: %d\n' % len(movie_actor_ds))
        output.write('#Directed_by : %d\n' % len(movie_director_ds))
        output.write('#Has: %d\n' % len(movie_genre_ds))
        output.write('#Produced_in: %d\n' % len(movie_countries_ds))
        output.write('\nTime Span:\n')
        output.write('-----------------------------\n')
        output.write('From: %s\n' % datetime.fromtimestamp(min_time))
        output.write('To: %s\n' % datetime.fromtimestamp(max_time))

    return indexer