def input(self,filename): # Parameter file self.parameters = dict() paramfile = open(filename, 'r') for line in paramfile: contents = line.split('\t') self.parameters[contents[0]] = contents[1].strip() # Abundances self.abundances = dict() abundfile = open(PyPluMA.prefix()+"/"+self.parameters['abundances'], 'r') header = abundfile.readline() # Header for line in abundfile: contents = line.split('\t') self.abundances["\""+contents[0]+"\""] = float(contents[1]) # Clusters self.clusters = [] clusterfile = open(PyPluMA.prefix()+"/"+self.parameters['clusters'], 'r') clustercount = 0 for line in clusterfile: if (line.strip() == "\"\",\"x\""): self.clusters.append([]) clustercount += 1 else: contents = line.split(',') self.clusters[clustercount-1].append(contents[1].strip()) # Centroids self.centroids = [] centroidfile = open(PyPluMA.prefix()+"/"+self.parameters['centroids'], 'r') header = centroidfile.readline() for line in centroidfile: contents = line.split(',') self.centroids.append(contents[1].strip())
def input(self, infile): inputfile = open(infile, 'r') self.parameters = dict() for line in inputfile: contents = line.strip().split('\t') self.parameters[contents[0]] = contents[1] self.infl1 = PyPluMA.prefix() + "/" + self.parameters["infile1"] self.infl2 = PyPluMA.prefix() + "/" + self.parameters["infile2"]
def input(self, filename): self.txtfile = open(filename, 'r') self.parameters = dict() for line in self.txtfile: contents = line.split('\t') self.parameters[contents[0]] = contents[1].strip() if len(PyPluMA.prefix()) != 0: self.parameters['csvfile'] = PyPluMA.prefix( ) + "/" + self.parameters['csvfile']
def run(self): self.newlines = [] column = self.parameters['column'] csvfile = open(self.parameters['csvfile'], 'r') criteria = self.parameters['criteria'] self.header = csvfile.readline().strip( ) # First line has the target column headercontents = self.header.split(',') tocheck = [] if (column[0] == '\"'): # Take out quotes column = column[1:len(column) - 1] for i in range(len(headercontents)): if (headercontents[i] != '\"\"' and headercontents[i][0] == '\"'): headercontents[i] = headercontents[i][1:len(headercontents[i] ) - 1] #if (headercontents[i][0] == 'V'): # print(headercontents[i].startswith(column)) # print(headercontents[len(column)]) # print(is_number(headercontents[len(column)+1:])) if (headercontents[i] == column): print(headercontents[i] + "\n") tocheck.append(i) elif (headercontents[i].startswith(column) and # For cases where you have more than one strain headercontents[i][len(column)] == '-' and is_number( headercontents[i][len(column) + 1:])): print(headercontents[i] + "\n") tocheck.append(i) #if (headercontents.count(column) == 0): if (len(tocheck) == 0): PyPluMA.log("[CSVScreen] WARNING: TARGET COLUMN NOT FOUND") else: self.newlines = [] for line in csvfile: line = line.strip() contents = line.split(',') if (criteria == "nonzero" ): # Only one has to be nonzero to keep it for targetindex in tocheck: if (not is_number(contents[targetindex])): self.newlines.append(line) elif (float(contents[targetindex]) != 0): self.newlines.append(line) break else: # All have to be zero to keep it isZero = True for targetindex in tocheck: if (not is_number(contents[targetindex])): isZero = False break elif (is_number(contents[targetindex]) and float(contents[targetindex]) != 0): isZero = False break if (isZero): self.newlines.append(line)
def input(self, filename): # Format expected: # correlationfile <somefile.csv> # pathwayfile <somefile.txt> thefile = open(filename, 'r') for line in thefile: myline = line.strip() entries = myline.split('\t') if (entries[0] == 'correlationfile'): self.myfile = PyPluMA.prefix() + "/" + entries[1] elif (entries[0] == 'pathwayfile'): self.mypathways = PyPluMA.prefix() + "/" + entries[1]
def output(self, filename): #gmlfilename = self.myfile[0:len(self.myfile)-3] + "gml" #gmlfile = open(gmlfilename, 'w') noafile = open(filename, 'w') PyPluMA.log("Writing NOA file ") noafile.write("name\tMM\n") for bac in self.bacteria: bac = bac.strip() if (bac[0] == '\"'): bac = bac[1:len(bac) - 1] if (bac[0] == 'x'): noafile.write(bac + "\t1\n") else: noafile.write(bac + "\t0\n")
def run(self): self.parameters = dict() parameterfile = open(self.myfile, 'r') for line in parameterfile: contents = line.strip().split('\t') self.parameters[contents[0]] = contents[1] filestuff = open(PyPluMA.prefix() + "/" + self.parameters["csvfile"], 'r') self.firstline = filestuff.readline().strip() lines = [] for line in filestuff: lines.append(line) self.m = len(lines) self.samples = [] self.bacteria = self.firstline.split(',') if (self.bacteria.count('\"\"') != 0): self.bacteria.remove('\"\"') self.n = len(self.bacteria) self.ADJ = [] i = 0 for i in range(self.m): self.ADJ.append([]) tmpsample = [] contents = lines[i].split(',') self.samples.append(contents[0]) for j in range(self.n): value = float(contents[j + 1].strip()) self.ADJ[i].append(value) i += 1 j = 0 while (j < len(self.bacteria)): #zeroflag = True nonzerocount = 0 for i in range(self.m): if (self.ADJ[i][j] > float(self.parameters["minval"])): #print(self.ADJ[i][j]) #zeroflag = False nonzerocount += 1 #break #if (zeroflag): if ((float(nonzerocount) / float(self.m)) < float( self.parameters["threshold"])): print("DELETING " + self.bacteria[j] + " " + str((float(nonzerocount) / float(self.m)))) self.firstline = self.firstline.replace(self.bacteria[j], "") if (self.firstline.find(",,") != -1): self.firstline = self.firstline.replace(",,", ",") if (self.firstline.endswith(',')): self.firstline = self.firstline[:len(self.firstline) - 1] del self.bacteria[j] for i in range(self.m): del self.ADJ[i][j] else: j += 1 self.n = len(self.bacteria)
def input(self, filename): filestuff = open(filename, 'r') networkdata = dict() for line in filestuff: keyval = line.split('\t') networkdata[keyval[0]] = keyval[1].strip() if (not ('correlations' in networkdata)): PyPluMA.log("Error in CSV2DotPlugin, no correlations file defined") sys.exit(1) else: self.correlations = open(networkdata['correlations'], 'r') if ('abundances' in networkdata): self.abundances = open(networkdata['abundances'], 'r') else: self.abundances = None if ('clusters' in networkdata): self.clusters = open(networkdata['clusters'], 'r') else: self.clusters = None
def output(self, outputfile): samples = self.firstline samples.remove('taxlevel') samples.remove('rankID') samples.remove('taxon') samples.remove('daughterlevels') samples.remove('total') #samples.sort() newfirstline = "\"\"," for i in range(len(samples)): newfirstline += "\"" + samples[i] + "\"" if (i != len(samples) - 1): newfirstline += ',' else: newfirstline += '\n' directoryflag = False if (os.path.exists(PyPluMA.prefix() + "/kingdom")): directoryflag = True outputfile = outputfile.replace(PyPluMA.prefix(), '') for i in range(len(self.classify)): if (not directoryflag): filestuff = open(outputfile + "." + self.classify[i] + ".csv", 'w') else: filestuff = open( PyPluMA.prefix() + "/" + self.classify[i] + "/" + outputfile + ".csv", 'w') filestuff.write(newfirstline) for j in range(len(self.taxa[i])): mylines = self.lines[i] filestuff.write("\"" + self.taxa[i][j] + "\"" + ",") contents = mylines[j].split('\t') contents = contents[5:] for k in range(len(contents)): filestuff.write(contents[k]) if (k != len(contents) - 1): filestuff.write(',')
def output(self, filename): #gmlfilename = self.myfile[0:len(self.myfile)-3] + "gml" #gmlfile = open(gmlfilename, 'w') dotfile = open(filename, 'w') PyPluMA.log("Writing Dot file ") dotfile.write("graph {\n") dotfile.write("forcelabels=true;\n") dotfile.write("penwidth=10;\n") if (self.abundances != None): for i in range(self.n): dotfile.write(self.bacteria[i]+"[label="+self.bacteria[i]+",fontsize="+str(int(200+self.ABUND[i]*100))+",penwidth=10,width="+str('{:f}'.format(20+self.ABUND[i]*10))+",height="+str('{:f}'.format(20+self.ABUND[i]*10))+"];\n") else: for i in range(self.n): dotfile.write(self.bacteria[i]+"[label="+self.bacteria[i]+",fontsize=100,penwidth=10,width=10,height=10];\n") for i in range(self.n): for j in range(i+1, self.n): if (self.ADJ[i][j] != 0): dotfile.write(self.bacteria[i]+" -- "+self.bacteria[j]+"[w="+str(self.ADJ[i][j])+",color=\"") if (self.ADJ[i][j] < 0): dotfile.write("red") else: dotfile.write("green") dotfile.write("\";penwidth="+str(self.ADJ[i][j]*10)+"];\n") if (self.clusters != None): count = 0 for cluster in self.CLUST: dotfile.write("subgraph clusterC"+str(count)+" { ") for node in cluster: dotfile.write(node+"; ") dotfile.write("}\n") count += 1 dotfile.write("}\n")
def run(self): filestuff = open(self.myfile, 'r') file1 = open(PyPluMA.prefix() + "/" + filestuff.readline().strip(), 'r') file2 = open(PyPluMA.prefix() + "/" + filestuff.readline().strip(), 'r') self.firstline1 = file1.readline().strip() self.bacteria1 = self.firstline1.split(',') if (self.bacteria1.count('\"\"') != 0): self.bacteria1.remove('\"\"') self.firstline2 = file2.readline().strip() self.bacteria2 = self.firstline2.split(',') if (self.bacteria2.count('\"\"') != 0): self.bacteria2.remove('\"\"') self.p = len(self.bacteria1) self.q = len(self.bacteria2) self.ADJ = [] i = 0 for line in file1: contents = line.split(',') self.ADJ.append([]) for j in range(self.p): value = float(contents[j + 1]) self.ADJ[i].append(value) line2 = file2.readline() contents2 = line2.split(',') for j in range(self.q): value = float(contents2[j + 1]) self.ADJ[i].append(value) i += 1 self.m = len(self.ADJ) self.n = self.p + self.q
def run(self): filestuff = open(self.myfile, 'r') firstline = filestuff.readline().strip() if (len(PyPluMA.prefix()) != 0): firstline = PyPluMA.prefix() + "/" + firstline firstfile = open(firstline, 'r') # Use first file to get indices self.firstline = firstfile.readline().strip() lines = [] for line in firstfile: lines.append(line.strip()) self.m = len(lines) self.samples = [] self.bacteria = self.firstline.split(',') if (self.bacteria.count('\"\"') != 0): self.bacteria.remove('\"\"') self.n = len(self.bacteria) self.ADJ = [] #numpy.zeros([self.m, self.n]) i = 0 for i in range(self.m): self.ADJ.append([]) contents = lines[i].split(',') self.samples.append(contents[0]) for j in range(self.n): #print contents[j+1] value = contents[j + 1].strip() #float(contents[j+1].strip()) #print self.ADJ[i][j] self.ADJ[i].append(value) #[j] = value i += 1 for line in filestuff: myline = line.strip() if (len(PyPluMA.prefix()) != 0): myline = PyPluMA.prefix() + "/" + myline newfile = open(myline, 'r') firstline = newfile.readline().strip() bac = firstline.split(',') if (bac.count('\"\"') != 0): bac.remove('\"\"') lines = [] for line2 in newfile: lines.append(line2.strip()) for line2 in lines: contents = line2.split(',') bac2 = contents[0] if (bac2 in self.samples): x = self.samples.index(bac2) else: self.ADJ.append([]) for i in range(0, self.n): self.ADJ[len(self.ADJ) - 1].append(0) x = len(self.ADJ) - 1 self.m += 1 self.samples.append(bac2) for j in range(1, len(contents)): if (bac[j - 1] in self.bacteria): #print("FOUND "+bac[j-1]+", NOT APPENDING") #xxx = input() y = self.bacteria.index(bac[j - 1]) #print(x) #print(len(self.ADJ)) #print(y) #print("ROW LENGTH "+str(x)) #print(len(self.ADJ[x])) #print(j) #print(len(contents)) #print(len(self.bacteria)) #print(self.n) self.ADJ[x][y] = contents[j].strip() else: self.bacteria.append(bac[j - 1]) #print("APPENDING: "+str(len(self.bacteria))) #xxx = input() self.n += 1 #print(self.n) for row in range(len(self.ADJ)): self.ADJ[row].append(0.0) self.ADJ[x][len(self.ADJ[x]) - 1] = contents[j].strip()
def input(self, filename): self.parameters = dict() paramfile = open(filename, 'r') for line in paramfile: contents = line.split('\t') self.parameters[contents[0]] = contents[1].strip() classnames_file = open( PyPluMA.prefix() + "/" + self.parameters['classnames'], 'r') self.class_names = [] for line in classnames_file: self.class_names.append(line.strip()) train_file = open(PyPluMA.prefix() + "/" + self.parameters['trainset'], 'r') pos = 0 train_image_list = [] train_label_list = [] for line in train_file: line = line.strip() contents = line.split(',') data = numpy.asarray(im.open(PyPluMA.prefix() + "/" + contents[0])) print("READING FILE " + contents[0]) train_image_list.append([data]) train_label_list.append( numpy.asarray([[self.class_names.index(contents[1])]])) pos += 1 train_images = numpy.vstack(tuple(train_image_list)) train_labels = numpy.vstack(tuple(train_label_list)) self.inputfilenames = [] test_file = open(PyPluMA.prefix() + "/" + self.parameters['testset'], 'r') pos = 0 test_image_list = [] test_label_list = [] for line in test_file: line = line.strip() contents = line.split(',') data = numpy.asarray(im.open(PyPluMA.prefix() + "/" + contents[0])) self.inputfilenames.append(contents[0]) print("READING FILE " + contents[0]) test_image_list.append([data]) test_label_list.append( numpy.asarray([[self.class_names.index(contents[1])]])) pos += 1 self.test_images = numpy.vstack(tuple(test_image_list)) self.test_labels = numpy.vstack(tuple(test_label_list)) # Normalize pixel values to be between 0 and 1 train_images, self.test_images = train_images / 255.0, self.test_images / 255.0 self.model = models.Sequential() tensorfile = open(PyPluMA.prefix() + "/" + self.parameters['tensor'], 'r') iter = 0 for line in tensorfile: contents = line.strip().split('\t') if (iter == 0): print(len(train_images[0])) print(len(train_images[0][0])) self.model.add( layers.Conv2D(int(contents[0]), (int(contents[1]), int(contents[2])), activation=contents[3], input_shape=(len(train_images[0]), len(train_images[0][0]), 3))) # Assuming RGB (3) else: self.model.add(layers.MaxPooling2D((2, 2))) self.model.add( layers.Conv2D(int(contents[0]), (int(contents[1]), int(contents[2])), activation=contents[3])) iter += 1 #self.model.add(layers.MaxPooling2D((2, 2))) #self.model.add(layers.Conv2D(64, (3, 3), activation='relu')) self.model.summary() self.model.add(layers.Flatten()) densefile = open(PyPluMA.prefix() + "/" + self.parameters['dense']) for line in densefile: contents = line.strip().split('\t') self.model.add( layers.Dense(int(contents[0]), activation=contents[1])) self.model.add(layers.Dense(len(self.class_names))) self.model.summary() self.model.compile(optimizer=self.parameters['optimize'], loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=[self.parameters['metric']]) #print(self.model.output) #print(dir(self.model)) #exit() history = self.model.fit(train_images, train_labels, epochs=int(self.parameters['epochs']), validation_data=(self.test_images, self.test_labels))
def input(self, filename): tempstuff = open(filename, 'r') self.myfile = PyPluMA.prefix() + "/" + tempstuff.readline().strip() self.mycountfile = PyPluMA.prefix() + "/" + tempstuff.readline().strip( )