def appendInfo(self, inputFname=None, db_vervet=None, outputFname=None,\ inversePCValue=True): """ #2012.9.25 skip samples whose individual_alignment entry could not be parsed. 2012.9.5 """ sys.stderr.write("Appending info to %s ..."%(inputFname)) reader = MatrixFile(inputFname) header = reader.next() newHeader = ['individualID'] for i in xrange(1, len(header)): newHeader.append('PC%s'%(i)) newHeader.extend(['sex|string', 'country|string', 'site-id', 'site-name|string', 'latitude', 'longitude', 'ucla_id|string', \ 'tax_id|string',\ 'species|string', 'collectionYear', 'medianDepth']) writer = csv.writer(open(outputFname, 'w'), delimiter='\t') writer.writerow(newHeader) counter = 0 for row in reader: row = row[:len(header)] #don't take extra columns sampleID = row[0] individualAlignment = db_vervet.parseAlignmentReadGroup(sampleID).individualAlignment if individualAlignment is None: #2012.9.25 #sampleID is not beginned with alignment ID, probably "ref" but could be something , skip them sys.stderr.write("Warning: sampleID %s is not parsable to get alignment out of it. Skip.\n"%(sampleID)) continue individual = individualAlignment.individual_sequence.individual data_row = ['%s_%s'%(individual.code, individualAlignment.id)] floatValue_row = row[1:] if inversePCValue: floatValue_row = map(float, floatValue_row) floatValue_row = numpy.array(floatValue_row) floatValue_row = -floatValue_row data_row.extend(list(floatValue_row)) scientifcName = self.db_taxonomy.returnScientificNameGivenTaxID(individual.tax_id) if scientifcName is None: scientifcName = "" if individual.collection_date: collectionYear = individual.collection_date.year else: collectionYear = '' data_row.extend([individual.sex, individual.site.country.name, individual.site.id, individual.site.short_name, \ individual.latitude, individual.longitude, individual.ucla_id, \ individual.tax_id, scientifcName, collectionYear, individualAlignment.median_depth]) writer.writerow(data_row) counter += 1 del writer sys.stderr.write("%s rows outputted.\n"%(counter))
def readInDataToPlot(self, input_fname, sampling_probability=1.0): """ 2015.01.23 added argument sampling_probability to sub-sample data 2013.07.11 use MatrixFile to read in the file 2009-5-20 add the column index into the column header for easy picking 2009-3-13 wrap the float conversion part into try...except to report what goes wrong 2009-3-13 """ if sampling_probability>1 or sampling_probability<0: sampling_probability=1.0 reader = MatrixFile(inputFname=input_fname) self.column_header=reader.next() for i in range(len(self.column_header)): self.column_header[i] = '%s %s'%(i, self.column_header[i]) no_of_cols = len(self.column_header) self.column_types = [str]*2 + [float]*(no_of_cols-2) self.column_editable_flag_ls = [True, True] + [False]*(no_of_cols-2) self.list_2d = [] for row in reader: if sampling_probability>0 and sampling_probability<1: if random.random()>sampling_probability: #skip continue float_part = row[2:] try: float_part = map(float, float_part) except: sys.stderr.write('Except type: %s\n'%repr(sys.exc_info())) traceback.print_exc() new_row = row[:2]+float_part self.list_2d.append(new_row) reader.close() self.setupColumns(self.treeview_matrix) #update status to reflect the input filename self.app1.set_title(os.path.basename(input_fname)) self.app1_appbar1.push(input_fname) self.plotXY(self.ax, self.canvas, self.liststore, self.plot_title)