def save_file_vector(file_path, vocab, out_file_path, header):
    log.debug("Saving vector for file {0}".format(file_path))
    with open(out_file_path, 'w+') as out_file_handle:
        out_file = csv.writer(out_file_handle)
        out_file.writerow(header)
        data = preprocess_file(file_path, vocab)
        for mu in np.arange(0, 1, 1. / (parameters.pivot_count + 1))[1:]: 
            result = lowbow_single(data, vocab, mu, parameters.c, parameters.sigma)
            out_file.writerow(list(result))
            log.debug("Got vector for mu={0}".format(mu))
def save_dataset_total(data_class, vocab):
    log.debug('total: starting with {0}ing data'.format(data_class))
    csv_out_path = os.path.join(parameters.out_root, '{0}_data.csv'.format(data_class))
    file_count = len([file for root, subFolders, files in os.walk(parameters.data_root) for file in files if data_class in root])
    cnt = 1
    with open(csv_out_path, 'w+') as out_file_handle:
        out_file = csv.writer(out_file_handle)
        for topic in parameters.topics:
            path = os.path.join(parameters.data_root, topic)
            if os.path.exists(path):
                actual_path = os.path.join(path, data_class)
                for label in os.listdir(actual_path):
                    label_path = os.path.join(actual_path, label)
                    for file_name in os.listdir(label_path):
                        log.debug("Saving vectors for file {0}, {1}/{2}".format(file_name, cnt, file_count))
                        file_path = os.path.join(label_path, file_name)
                        data = preprocess_file(file_path, vocab)
                        attributes = []
                        for mu in np.arange(0, 1, 1. / (parameters.pivot_count + 1))[1:]: 
                            result = list(lowbow_single(data, vocab, mu, parameters.c, parameters.sigma))
                            attributes.extend(result)
                            log.debug("Got vector for mu={0}".format(mu))
                        out_file.writerow(attributes + [label])
                        cnt += 1