def import_idfs(dirname): return [Counters.import_weights(path.join(dirname, f), weights.idf) for f in os.listdir(dirname)]
def import_tfs(dirname): return [(f, Counters.import_weights(path.join(dirname, f), weights.tf)) for f in os.listdir(dirname)]
from os import path from counters import Counters from rating import RIC from loader import Loader import matplotlib.pyplot as plt import weights sets_dir = '/home/rsuvorov/projects/dcfs/data' white_set = 'en_var_1' black_set = 'en_porno_1' white_dir = path.join(sets_dir, white_set) black_dir = path.join(sets_dir, black_set) white_docs = Loader.import_tfs(path.join(white_dir, 'tf')) white_idf = Counters.import_weights(path.join(white_dir, white_set + '.idf'), weights.idf) black_docs = Loader.import_tfs(path.join(black_dir, 'tf')) black_idf = Counters.import_weights(path.join(black_dir, black_set + '.idf'), weights.idf) rater = RIC(black_idf, white_idf) white_ratings = [(fname, rater(tf)) for (fname, tf) in white_docs] black_ratings = [(fname, rater(tf)) for (fname, tf) in black_docs] white_dat = open(path.join(white_dir, "ratings.csv"), 'w') for (fname, r) in white_ratings: print >> white_dat, fname, r black_dat = open(path.join(black_dir, "ratings.csv"), 'w') for (fname, r) in black_ratings: print >> black_dat, fname, r