def run_extractmzs(sc, fname, data, nrows, ncols): ff = sc.textFile(fname) spectra = ff.map(txt_to_spectrum) # qres = spectra.map(lambda sp : get_many_groups_total_dict(data, sp)).reduce(join_dicts) qres = spectra.map(lambda sp : get_many_groups_total_dict_individual(data, sp)).reduce(reduce_manygroups_dict) entropies = [ blockentropy.get_block_entropy_dict(x, nrows, ncols) for x in qres ] return (qres, entropies)
def run_fulldataset(sc, fname, data, nrows, ncols): ff = sc.textFile(fname) spectra = ff.map(txt_to_spectrum) qres = spectra.map(lambda sp : get_many_groups2d_total_dict_individual(data, sp)).reduce(reduce_manygroups2d_dict_individual) entropies = [ [ blockentropy.get_block_entropy_dict(x, nrows, ncols) for x in res ] for res in qres ] return (qres, entropies)