コード例 #1
0
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count,
                **rep_args):
    first_iter = True
    base_embed = None
    for year in years:
        print "Loading year:", year  # for each year
        year_embed = create_representation(rep_type, in_dir + str(year),
                                           **rep_args)  # load in embedding pkl
        year_words = words_above_count(
            count_dir, year,
            min_count)  # load count pkl, returns only words greater min_count
        year_embed.get_subembed(
            year_words
        )  # keep the embeddings for only the words in year_words, if not out of vocabulary
        print "Aligning year:", year
        if first_iter:  # for first iteration, our aligned embed is our base embed so basically skip it
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(
                base_embed, year_embed)
        base_embed = aligned_embed
        print "Writing year:", year
        foutname = out_dir + str(year)
        np.save(foutname + "-w.npy", aligned_embed.m)
        write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
コード例 #2
0
    def get_aligned(self, normalize=False):
        year_aligned_embeds = collections.OrderedDict()
        first_iter = True
        base_embed = None
        for year,embed in self.embeds.iteritems():
            if first_iter:
                year_aligned_embeds[year] = embed
                first_iter = False
            else:
                year_aligned_embeds[year] = smart_procrustes_align(base_embed, embed, post_normalize=False)
            base_embed = year_aligned_embeds[year]

        return SequentialEmbedding.from_ordered_dict(year_aligned_embeds)
コード例 #3
0
ファイル: seq_procrustes.py プロジェクト: viveksck/langchange
def align_years(years):
    first_iter = True
    base_embed = None
    for year in years:
        print "Loading year:", year
        year_embed = embedding.Embedding.load(INPUT_FILE.format(year=year))
        print "Aligning year:", year
        if first_iter:
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed)
        base_embed = aligned_embed
        print "Writing year:", year
        foutname = OUTPUT_FILE.format(year=year)
        np.save(foutname+".npy",aligned_embed.m)
        with file(foutname+".vocab","w") as outf:
           print >> outf, " ".join(aligned_embed.iw)
コード例 #4
0
ファイル: seq_procrustes.py プロジェクト: viveksck/langchange
def align_years(years):
    first_iter = True
    base_embed = None
    for year in years:
        print "Loading year:", year
        year_embed = embedding.Embedding.load(INPUT_FILE.format(year=year))
        print "Aligning year:", year
        if first_iter:
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(
                base_embed, year_embed)
        base_embed = aligned_embed
        print "Writing year:", year
        foutname = OUTPUT_FILE.format(year=year)
        np.save(foutname + ".npy", aligned_embed.m)
        with file(foutname + ".vocab", "w") as outf:
            print >> outf, " ".join(aligned_embed.iw)
コード例 #5
0
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args):
    first_iter = True
    base_embed = None
    for year in years:
        print("Loading year:", year)
        year_embed =  create_representation(rep_type, in_dir + str(year), **rep_args)
        year_words = words_above_count(count_dir, year, min_count)
        year_embed.get_subembed(year_words)
        print("Aligning year:", year)
        if first_iter:
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed)
        base_embed = aligned_embed
        print("Writing year:", year)
        foutname = out_dir + str(year)
        np.save(foutname + "-w.npy",aligned_embed.m)
        write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
コード例 #6
0
ファイル: seq_procrustes.py プロジェクト: LinguList/histwords
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args):
    first_iter = True
    base_embed = None
    for year in years:
        print "Loading year:", year
        year_embed =  create_representation(rep_type, in_dir + str(year), **rep_args)
        year_words = words_above_count(count_dir, year, min_count)
        year_embed.get_subembed(year_words)
        print "Aligning year:", year
        if first_iter:
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed)
        base_embed = aligned_embed
        print "Writing year:", year
        foutname = out_dir + str(year)
        np.save(foutname + "-w.npy",aligned_embed.m)
        write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
コード例 #7
0
def align_years(years, rep_type, in_dir, out_dir, **rep_args):
    first_iter = True
    base_embed = None
    for year in years:  # Iterates through years
        print "Loading year:", year
        year_embed = create_representation(
            rep_type, in_dir + str(year),
            **rep_args)  # Loads the individual embedding
        print "Aligning year:", year
        if first_iter:
            aligned_embed = year_embed
            first_iter = False
        else:
            aligned_embed = alignment.smart_procrustes_align(
                base_embed, year_embed,
                post_normalize=False)  # Rotates to the previous year embedding
        base_embed = aligned_embed
        print "Writing year:", year
        foutname = out_dir + str(year)
        np.save(foutname + "-w.npy", aligned_embed.m)
        write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")