def main(): parser = argparse.ArgumentParser( description='Computes an LSA model correlating associations and lexsem model.') parser.add_argument('--lexspace', '-l', metavar='FILE', help='The input lexical space.') parser.add_argument('--assoc', '-a', metavar='FILE', help='The input association space.') args = parser.parse_args() lex = norm2_matrix(read_vector_file(openfile(args.lexspace))) assoc = norm2_matrix(read_vector_file(openfile(args.assoc))) together = pd.concat(lex, assoc, keys=("lex", "assoc")) org_matrix = together.as_matrix() U, S, V = svd(org_matrix) np.savez("svd.npz", U, S, V)
def main(): parser = argparse.ArgumentParser( description= 'Computes an LSA model correlating associations and lexsem model.') parser.add_argument('--lexspace', '-l', metavar='FILE', help='The input lexical space.') parser.add_argument('--assoc', '-a', metavar='FILE', help='The input association space.') args = parser.parse_args() lex = norm2_matrix(read_vector_file(openfile(args.lexspace))) assoc = norm2_matrix(read_vector_file(openfile(args.assoc))) together = pd.concat(lex, assoc, keys=("lex", "assoc")) org_matrix = together.as_matrix() U, S, V = svd(org_matrix) np.savez("svd.npz", U, S, V)
def main(): parser = argparse.ArgumentParser( description='Computes correlations with compositionality ratings.') parser.add_argument('--input', '-i', action="append", type=openfile, metavar="FILE", help='Input vector space.') parser.add_argument('--ratings', '-r', metavar='COMPFILE', type=openfile, help='The compositionality ratings file.') parser.add_argument('--self', '-s', action="store_true", help='Whether we should include self-comp ratings.') parser.add_argument('--no-tsv', '-T', action="store_true", help="*Don't* output the TSV containing comp and model ratings.") parser.add_argument('--corrs', '-c', action="store_true", help='Specifies whether correlations should be computed and outputed.') parser.add_argument('--pdf', '-p', metavar="FILE", default=None, help='Output plots as a PDF to the given filename.') args = parser.parse_args() compratings = pd.read_table(args.ratings) if not args.self: compratings = compratings[compratings["compound"] != compratings["const"]] word_pairs = set(zip(compratings['compound'], compratings['const'])) named_vector_spaces = [ (basename(f.name), norm2_matrix(df_remove_pos(read_vector_file(f)))) for f in args.input ] if len(named_vector_spaces) > 1: # need to do concatenation names, vses = zip(*named_vector_spaces) concat_space = pd.concat(vses, keys=names) named_vector_spaces.append(("<concat>", concat_space)) # compute all the distances AND keep the different measures independently named distances = [ cdm(vs, word_pairs, [DISTANCE_METRIC]) .rename(columns={DISTANCE_METRIC.name: fn + ":" + DISTANCE_METRIC.name}) for fn, vs in named_vector_spaces ] # now we need to join all the distance calculations: joined_measures = reduce(pd.merge, distances).rename( columns={"left": "compound", "right": "const"}) # finally join the similarity measures with the human ratings dm_and_comp = pd.merge(compratings, joined_measures) # output dm_and_comp unless the user specified not to if not args.no_tsv: dm_and_comp.to_csv(sys.stdout, index=False, sep="\t") # nicer output if not args.no_tsv and args.corrs: # let's compute our correlations print "\n" + "-" * 80 + "\n" # compute and output correlations if the user asked if args.corrs: corrs = correlations(dm_and_comp).to_csv(sys.stdout, index=False, sep="\t") # plot the measures if the user asked. if args.pdf: scatters(dm_and_comp, args.pdf)
def main(): parser = argparse.ArgumentParser( description='Computes correlations with compositionality ratings.') parser.add_argument('--input', '-i', action="append", type=openfile, metavar="FILE", help='Input vector space.') parser.add_argument('--ratings', '-r', metavar='COMPFILE', type=openfile, help='The compositionality ratings file.') parser.add_argument('--self', '-s', action="store_true", help='Whether we should include self-comp ratings.') parser.add_argument( '--no-tsv', '-T', action="store_true", help="*Don't* output the TSV containing comp and model ratings.") parser.add_argument( '--corrs', '-c', action="store_true", help='Specifies whether correlations should be computed and outputed.') parser.add_argument('--pdf', '-p', metavar="FILE", default=None, help='Output plots as a PDF to the given filename.') args = parser.parse_args() compratings = pd.read_table(args.ratings) if not args.self: compratings = compratings[ compratings["compound"] != compratings["const"]] word_pairs = set(zip(compratings['compound'], compratings['const'])) named_vector_spaces = [(basename(f.name), norm2_matrix(df_remove_pos(read_vector_file(f)))) for f in args.input] if len(named_vector_spaces) > 1: # need to do concatenation names, vses = zip(*named_vector_spaces) concat_space = pd.concat(vses, keys=names) named_vector_spaces.append(("<concat>", concat_space)) # compute all the distances AND keep the different measures independently named distances = [ cdm(vs, word_pairs, [DISTANCE_METRIC]).rename( columns={DISTANCE_METRIC.name: fn + ":" + DISTANCE_METRIC.name}) for fn, vs in named_vector_spaces ] # now we need to join all the distance calculations: joined_measures = reduce(pd.merge, distances).rename(columns={ "left": "compound", "right": "const" }) # finally join the similarity measures with the human ratings dm_and_comp = pd.merge(compratings, joined_measures) # output dm_and_comp unless the user specified not to if not args.no_tsv: dm_and_comp.to_csv(sys.stdout, index=False, sep="\t") # nicer output if not args.no_tsv and args.corrs: # let's compute our correlations print "\n" + "-" * 80 + "\n" # compute and output correlations if the user asked if args.corrs: corrs = correlations(dm_and_comp).to_csv(sys.stdout, index=False, sep="\t") # plot the measures if the user asked. if args.pdf: scatters(dm_and_comp, args.pdf)