def process(args, raw_hud_lines): """ @param args: user options from the web or cmdline @param hud_lines: raw lines of a .hud file @return: results in convenient text form """ out = StringIO() names, data = hud.decode(raw_hud_lines) C_full = np.array(data, dtype=float) pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic) axis_index = args.axis - 1 # check for sufficient number of eigenvectors if axis_index >= len(pcs): msg = 'the requested axis is not available' raise ValueError(msg) # compute the correlation of each SNP vector the requested PC pc = pcs[axis_index] corrs = [mycorr(snp, pc) for snp in C_full.T] sqcorrs = [mycorr(snp, pc)**2 for snp in C_full.T] if args.rank_squared: keys = sqcorrs else: keys = corrs corr_index_pairs = [(cor, i) for i, cor in enumerate(keys)] sorted_pairs = list(reversed(sorted(corr_index_pairs))) indices = zip(*sorted_pairs)[1] if args.locus_from_1: nominal_indices = [i+1 for i in indices] else: nominal_indices = indices rows = [(nom_i, corrs[i]) for i, nom_i in zip(indices, nominal_indices)] lines = ['\t'.join(str(x) for x in row) for row in rows] return '\n'.join(lines) + '\n'
def process(args, raw_hud_lines): """ @param args: user options from the web or cmdline @param hud_lines: raw lines of a .hud file @return: results in convenient text form """ out = StringIO() names, data = hud.decode(raw_hud_lines) # normalize the names of the isolates if args.clean_isolates: names = [Carbone.clean_isolate_element(x) for x in names] # get the pcs C_full = np.array(data, dtype=float) pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic) # check for sufficient number of eigenvectors if len(pcs) < args.npcs: msg_a = 'the number of requested principal components ' msg_b = 'must be no more than the number of OTUs' raise ValueError(msg_a + msg_b) # create the R frame headers = ['otu'] + ['pc%d' % (i+1) for i in range(args.npcs)] print >> out, '\t'.join(headers) for i, name in enumerate(names): typed_row = [name] + [pcs[j][i] for j in range(args.npcs)] if args.add_indices: typed_row = [i+1] + typed_row row = [str(x) for x in typed_row] print >> out, '\t'.join(row) return out.getvalue()
def process(args, raw_hud_lines): """ @param args: user options from the web or cmdline @param hud_lines: raw lines of a .hud file @return: results in convenient text form """ out = StringIO() names, data = hud.decode(raw_hud_lines) # normalize the names of the isolates if args.clean_isolates: names = [Carbone.clean_isolate_element(x) for x in names] # get the pcs C_full = np.array(data, dtype=float) pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic) # check for sufficient number of eigenvectors if len(pcs) < args.npcs: msg_a = 'the number of requested principal components ' msg_b = 'must be no more than the number of OTUs' raise ValueError(msg_a + msg_b) # create the R frame headers = ['otu'] + ['pc%d' % (i + 1) for i in range(args.npcs)] print >> out, '\t'.join(headers) for i, name in enumerate(names): typed_row = [name] + [pcs[j][i] for j in range(args.npcs)] if args.add_indices: typed_row = [i + 1] + typed_row row = [str(x) for x in typed_row] print >> out, '\t'.join(row) return out.getvalue()
def process(args, raw_hud_lines): """ @param args: user options from the web or cmdline @param hud_lines: raw lines of a .hud file @return: results in convenient text form """ out = StringIO() names, data = hud.decode(raw_hud_lines) C_full = np.array(data, dtype=float) pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic) # check for sufficient number of eigenvectors if len(pcs) < args.ncoords: raise ValueError('the number of requested principal components ' 'must be no more than the number of OTUs') # compute the correlation of each SNP vector with each principal PC mylist = [] for snp in C_full.T: row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]] mylist.append(row) np.set_printoptions(linewidth=300, threshold=10000) return str(np.array(mylist))
def process(args, raw_hud_lines): """ @param args: user options from the web or cmdline @param hud_lines: raw lines of a .hud file @return: results in convenient text form """ out = StringIO() names, data = hud.decode(raw_hud_lines) C_full = np.array(data, dtype=float) pcs = eigenpop.get_scaled_eigenvectors(C_full, args.diploid_and_biallelic) # check for sufficient number of eigenvectors if len(pcs) < args.ncoords: raise ValueError( 'the number of requested principal components ' 'must be no more than the number of OTUs') # compute the correlation of each SNP vector with each principal PC mylist = [] for snp in C_full.T: row = [mycorr(snp, pc) for pc in pcs[:args.ncoords]] mylist.append(row) np.set_printoptions(linewidth=300, threshold=10000) return str(np.array(mylist))