Example #1
0
   def input(self,filename):
      # Parameter file
      self.parameters = dict()
      paramfile = open(filename, 'r')
      for line in paramfile:
         contents = line.split('\t')
         self.parameters[contents[0]] = contents[1].strip()

      # Abundances
      self.abundances = dict()
      abundfile = open(PyPluMA.prefix()+"/"+self.parameters['abundances'], 'r')
      header = abundfile.readline() # Header
      for line in abundfile:
         contents = line.split('\t')
         self.abundances["\""+contents[0]+"\""] = float(contents[1])

      # Clusters
      self.clusters = []
      clusterfile = open(PyPluMA.prefix()+"/"+self.parameters['clusters'], 'r')
      clustercount = 0
      for line in clusterfile:
         if (line.strip() == "\"\",\"x\""):
            self.clusters.append([])
            clustercount += 1
         else:
            contents = line.split(',')
            self.clusters[clustercount-1].append(contents[1].strip())
      
      # Centroids
      self.centroids = []
      centroidfile = open(PyPluMA.prefix()+"/"+self.parameters['centroids'], 'r')
      header = centroidfile.readline()
      for line in centroidfile:
         contents = line.split(',')
         self.centroids.append(contents[1].strip())
Example #2
0
 def input(self, infile):
     inputfile = open(infile, 'r')
     self.parameters = dict()
     for line in inputfile:
         contents = line.strip().split('\t')
         self.parameters[contents[0]] = contents[1]
     self.infl1 = PyPluMA.prefix() + "/" + self.parameters["infile1"]
     self.infl2 = PyPluMA.prefix() + "/" + self.parameters["infile2"]
 def input(self, filename):
     self.txtfile = open(filename, 'r')
     self.parameters = dict()
     for line in self.txtfile:
         contents = line.split('\t')
         self.parameters[contents[0]] = contents[1].strip()
     if len(PyPluMA.prefix()) != 0:
         self.parameters['csvfile'] = PyPluMA.prefix(
         ) + "/" + self.parameters['csvfile']
 def run(self):
     self.newlines = []
     column = self.parameters['column']
     csvfile = open(self.parameters['csvfile'], 'r')
     criteria = self.parameters['criteria']
     self.header = csvfile.readline().strip(
     )  # First line has the target column
     headercontents = self.header.split(',')
     tocheck = []
     if (column[0] == '\"'):  # Take out quotes
         column = column[1:len(column) - 1]
     for i in range(len(headercontents)):
         if (headercontents[i] != '\"\"' and headercontents[i][0] == '\"'):
             headercontents[i] = headercontents[i][1:len(headercontents[i]
                                                         ) - 1]
         #if (headercontents[i][0] == 'V'):
         #   print(headercontents[i].startswith(column))
         #   print(headercontents[len(column)])
         #   print(is_number(headercontents[len(column)+1:]))
         if (headercontents[i] == column):
             print(headercontents[i] + "\n")
             tocheck.append(i)
         elif (headercontents[i].startswith(column)
               and  # For cases where you have more than one strain
               headercontents[i][len(column)] == '-' and is_number(
                   headercontents[i][len(column) + 1:])):
             print(headercontents[i] + "\n")
             tocheck.append(i)
     #if (headercontents.count(column) == 0):
     if (len(tocheck) == 0):
         PyPluMA.log("[CSVScreen] WARNING: TARGET COLUMN NOT FOUND")
     else:
         self.newlines = []
         for line in csvfile:
             line = line.strip()
             contents = line.split(',')
             if (criteria == "nonzero"
                 ):  # Only one has to be nonzero to keep it
                 for targetindex in tocheck:
                     if (not is_number(contents[targetindex])):
                         self.newlines.append(line)
                     elif (float(contents[targetindex]) != 0):
                         self.newlines.append(line)
                         break
             else:  # All have to be zero to keep it
                 isZero = True
                 for targetindex in tocheck:
                     if (not is_number(contents[targetindex])):
                         isZero = False
                         break
                     elif (is_number(contents[targetindex])
                           and float(contents[targetindex]) != 0):
                         isZero = False
                         break
                 if (isZero):
                     self.newlines.append(line)
 def input(self, filename):
     # Format expected:
     # correlationfile <somefile.csv>
     # pathwayfile <somefile.txt>
     thefile = open(filename, 'r')
     for line in thefile:
         myline = line.strip()
         entries = myline.split('\t')
         if (entries[0] == 'correlationfile'):
             self.myfile = PyPluMA.prefix() + "/" + entries[1]
         elif (entries[0] == 'pathwayfile'):
             self.mypathways = PyPluMA.prefix() + "/" + entries[1]
Example #6
0
    def output(self, filename):
        #gmlfilename = self.myfile[0:len(self.myfile)-3] + "gml"
        #gmlfile = open(gmlfilename, 'w')
        noafile = open(filename, 'w')
        PyPluMA.log("Writing NOA file ")

        noafile.write("name\tMM\n")
        for bac in self.bacteria:
            bac = bac.strip()
            if (bac[0] == '\"'):
                bac = bac[1:len(bac) - 1]
            if (bac[0] == 'x'):
                noafile.write(bac + "\t1\n")
            else:
                noafile.write(bac + "\t0\n")
    def run(self):
        self.parameters = dict()
        parameterfile = open(self.myfile, 'r')
        for line in parameterfile:
            contents = line.strip().split('\t')
            self.parameters[contents[0]] = contents[1]
        filestuff = open(PyPluMA.prefix() + "/" + self.parameters["csvfile"],
                         'r')
        self.firstline = filestuff.readline().strip()
        lines = []
        for line in filestuff:
            lines.append(line)

        self.m = len(lines)
        self.samples = []
        self.bacteria = self.firstline.split(',')
        if (self.bacteria.count('\"\"') != 0):
            self.bacteria.remove('\"\"')
        self.n = len(self.bacteria)
        self.ADJ = []
        i = 0
        for i in range(self.m):
            self.ADJ.append([])
            tmpsample = []
            contents = lines[i].split(',')
            self.samples.append(contents[0])
            for j in range(self.n):
                value = float(contents[j + 1].strip())
                self.ADJ[i].append(value)
            i += 1

        j = 0
        while (j < len(self.bacteria)):
            #zeroflag = True
            nonzerocount = 0
            for i in range(self.m):
                if (self.ADJ[i][j] > float(self.parameters["minval"])):
                    #print(self.ADJ[i][j])
                    #zeroflag = False
                    nonzerocount += 1
                    #break
            #if (zeroflag):
            if ((float(nonzerocount) / float(self.m)) < float(
                    self.parameters["threshold"])):
                print("DELETING " + self.bacteria[j] + " " +
                      str((float(nonzerocount) / float(self.m))))
                self.firstline = self.firstline.replace(self.bacteria[j], "")
                if (self.firstline.find(",,") != -1):
                    self.firstline = self.firstline.replace(",,", ",")
                if (self.firstline.endswith(',')):
                    self.firstline = self.firstline[:len(self.firstline) - 1]
                del self.bacteria[j]
                for i in range(self.m):
                    del self.ADJ[i][j]
            else:
                j += 1
        self.n = len(self.bacteria)
 def input(self, filename):
    filestuff = open(filename, 'r')
    networkdata = dict()
    for line in filestuff:
       keyval = line.split('\t')
       networkdata[keyval[0]] = keyval[1].strip()
    if (not ('correlations' in networkdata)):
       PyPluMA.log("Error in CSV2DotPlugin, no correlations file defined")
       sys.exit(1)
    else:
       self.correlations = open(networkdata['correlations'], 'r')
    if ('abundances' in networkdata):
       self.abundances = open(networkdata['abundances'], 'r')
    else:
       self.abundances = None
    if ('clusters' in networkdata):
       self.clusters = open(networkdata['clusters'], 'r')
    else:
       self.clusters = None
Example #9
0
    def output(self, outputfile):
        samples = self.firstline
        samples.remove('taxlevel')
        samples.remove('rankID')
        samples.remove('taxon')
        samples.remove('daughterlevels')
        samples.remove('total')
        #samples.sort()
        newfirstline = "\"\","
        for i in range(len(samples)):
            newfirstline += "\"" + samples[i] + "\""
            if (i != len(samples) - 1):
                newfirstline += ','
            else:
                newfirstline += '\n'

        directoryflag = False
        if (os.path.exists(PyPluMA.prefix() + "/kingdom")):
            directoryflag = True
            outputfile = outputfile.replace(PyPluMA.prefix(), '')

        for i in range(len(self.classify)):
            if (not directoryflag):
                filestuff = open(outputfile + "." + self.classify[i] + ".csv",
                                 'w')
            else:
                filestuff = open(
                    PyPluMA.prefix() + "/" + self.classify[i] + "/" +
                    outputfile + ".csv", 'w')
            filestuff.write(newfirstline)
            for j in range(len(self.taxa[i])):
                mylines = self.lines[i]
                filestuff.write("\"" + self.taxa[i][j] + "\"" + ",")
                contents = mylines[j].split('\t')
                contents = contents[5:]
                for k in range(len(contents)):
                    filestuff.write(contents[k])
                    if (k != len(contents) - 1):
                        filestuff.write(',')
Example #10
0
   def output(self, filename):
      #gmlfilename = self.myfile[0:len(self.myfile)-3] + "gml"
      #gmlfile = open(gmlfilename, 'w')
      dotfile = open(filename, 'w')
      PyPluMA.log("Writing Dot file ")

      dotfile.write("graph {\n")
      dotfile.write("forcelabels=true;\n")
      dotfile.write("penwidth=10;\n")
      if (self.abundances != None):
         for i in range(self.n):
            dotfile.write(self.bacteria[i]+"[label="+self.bacteria[i]+",fontsize="+str(int(200+self.ABUND[i]*100))+",penwidth=10,width="+str('{:f}'.format(20+self.ABUND[i]*10))+",height="+str('{:f}'.format(20+self.ABUND[i]*10))+"];\n")
      else:
         for i in range(self.n):
            dotfile.write(self.bacteria[i]+"[label="+self.bacteria[i]+",fontsize=100,penwidth=10,width=10,height=10];\n")


      for i in range(self.n):
         for j in range(i+1, self.n):
            if (self.ADJ[i][j] != 0):
               dotfile.write(self.bacteria[i]+" -- "+self.bacteria[j]+"[w="+str(self.ADJ[i][j])+",color=\"")
               if (self.ADJ[i][j] < 0):
                  dotfile.write("red")
               else:
                  dotfile.write("green")
               dotfile.write("\";penwidth="+str(self.ADJ[i][j]*10)+"];\n")

      if (self.clusters != None):
       count = 0
       for cluster in self.CLUST:
         dotfile.write("subgraph clusterC"+str(count)+" { ")
         for node in cluster:
            dotfile.write(node+"; ")
         dotfile.write("}\n")
         count += 1
         
       dotfile.write("}\n")
Example #11
0
    def run(self):
        filestuff = open(self.myfile, 'r')
        file1 = open(PyPluMA.prefix() + "/" + filestuff.readline().strip(),
                     'r')
        file2 = open(PyPluMA.prefix() + "/" + filestuff.readline().strip(),
                     'r')

        self.firstline1 = file1.readline().strip()
        self.bacteria1 = self.firstline1.split(',')
        if (self.bacteria1.count('\"\"') != 0):
            self.bacteria1.remove('\"\"')

        self.firstline2 = file2.readline().strip()
        self.bacteria2 = self.firstline2.split(',')
        if (self.bacteria2.count('\"\"') != 0):
            self.bacteria2.remove('\"\"')

        self.p = len(self.bacteria1)
        self.q = len(self.bacteria2)

        self.ADJ = []
        i = 0
        for line in file1:
            contents = line.split(',')
            self.ADJ.append([])
            for j in range(self.p):
                value = float(contents[j + 1])
                self.ADJ[i].append(value)
            line2 = file2.readline()
            contents2 = line2.split(',')
            for j in range(self.q):
                value = float(contents2[j + 1])
                self.ADJ[i].append(value)
            i += 1
        self.m = len(self.ADJ)
        self.n = self.p + self.q
    def run(self):
        filestuff = open(self.myfile, 'r')

        firstline = filestuff.readline().strip()
        if (len(PyPluMA.prefix()) != 0):
            firstline = PyPluMA.prefix() + "/" + firstline
        firstfile = open(firstline, 'r')

        # Use first file to get indices
        self.firstline = firstfile.readline().strip()
        lines = []
        for line in firstfile:
            lines.append(line.strip())

        self.m = len(lines)
        self.samples = []
        self.bacteria = self.firstline.split(',')
        if (self.bacteria.count('\"\"') != 0):
            self.bacteria.remove('\"\"')

        self.n = len(self.bacteria)
        self.ADJ = []  #numpy.zeros([self.m, self.n])
        i = 0
        for i in range(self.m):
            self.ADJ.append([])
            contents = lines[i].split(',')
            self.samples.append(contents[0])
            for j in range(self.n):
                #print contents[j+1]
                value = contents[j + 1].strip()  #float(contents[j+1].strip())
                #print self.ADJ[i][j]
                self.ADJ[i].append(value)  #[j] = value
            i += 1

        for line in filestuff:
            myline = line.strip()
            if (len(PyPluMA.prefix()) != 0):
                myline = PyPluMA.prefix() + "/" + myline
            newfile = open(myline, 'r')
            firstline = newfile.readline().strip()
            bac = firstline.split(',')
            if (bac.count('\"\"') != 0):
                bac.remove('\"\"')
            lines = []
            for line2 in newfile:
                lines.append(line2.strip())
            for line2 in lines:
                contents = line2.split(',')
                bac2 = contents[0]
                if (bac2 in self.samples):
                    x = self.samples.index(bac2)
                else:
                    self.ADJ.append([])
                    for i in range(0, self.n):
                        self.ADJ[len(self.ADJ) - 1].append(0)
                    x = len(self.ADJ) - 1
                    self.m += 1
                    self.samples.append(bac2)
                for j in range(1, len(contents)):
                    if (bac[j - 1] in self.bacteria):
                        #print("FOUND "+bac[j-1]+", NOT APPENDING")
                        #xxx = input()
                        y = self.bacteria.index(bac[j - 1])
                        #print(x)
                        #print(len(self.ADJ))
                        #print(y)
                        #print("ROW LENGTH "+str(x))
                        #print(len(self.ADJ[x]))
                        #print(j)
                        #print(len(contents))
                        #print(len(self.bacteria))
                        #print(self.n)
                        self.ADJ[x][y] = contents[j].strip()
                    else:
                        self.bacteria.append(bac[j - 1])
                        #print("APPENDING: "+str(len(self.bacteria)))
                        #xxx = input()
                        self.n += 1
                        #print(self.n)
                        for row in range(len(self.ADJ)):
                            self.ADJ[row].append(0.0)
                        self.ADJ[x][len(self.ADJ[x]) - 1] = contents[j].strip()
Example #13
0
    def input(self, filename):
        self.parameters = dict()
        paramfile = open(filename, 'r')
        for line in paramfile:
            contents = line.split('\t')
            self.parameters[contents[0]] = contents[1].strip()

        classnames_file = open(
            PyPluMA.prefix() + "/" + self.parameters['classnames'], 'r')
        self.class_names = []
        for line in classnames_file:
            self.class_names.append(line.strip())

        train_file = open(PyPluMA.prefix() + "/" + self.parameters['trainset'],
                          'r')
        pos = 0
        train_image_list = []
        train_label_list = []
        for line in train_file:
            line = line.strip()
            contents = line.split(',')
            data = numpy.asarray(im.open(PyPluMA.prefix() + "/" + contents[0]))
            print("READING FILE " + contents[0])
            train_image_list.append([data])
            train_label_list.append(
                numpy.asarray([[self.class_names.index(contents[1])]]))
            pos += 1
        train_images = numpy.vstack(tuple(train_image_list))
        train_labels = numpy.vstack(tuple(train_label_list))

        self.inputfilenames = []
        test_file = open(PyPluMA.prefix() + "/" + self.parameters['testset'],
                         'r')
        pos = 0
        test_image_list = []
        test_label_list = []
        for line in test_file:
            line = line.strip()
            contents = line.split(',')
            data = numpy.asarray(im.open(PyPluMA.prefix() + "/" + contents[0]))
            self.inputfilenames.append(contents[0])
            print("READING FILE " + contents[0])
            test_image_list.append([data])
            test_label_list.append(
                numpy.asarray([[self.class_names.index(contents[1])]]))
            pos += 1
        self.test_images = numpy.vstack(tuple(test_image_list))
        self.test_labels = numpy.vstack(tuple(test_label_list))

        # Normalize pixel values to be between 0 and 1
        train_images, self.test_images = train_images / 255.0, self.test_images / 255.0

        self.model = models.Sequential()
        tensorfile = open(PyPluMA.prefix() + "/" + self.parameters['tensor'],
                          'r')
        iter = 0
        for line in tensorfile:
            contents = line.strip().split('\t')
            if (iter == 0):
                print(len(train_images[0]))
                print(len(train_images[0][0]))
                self.model.add(
                    layers.Conv2D(int(contents[0]),
                                  (int(contents[1]), int(contents[2])),
                                  activation=contents[3],
                                  input_shape=(len(train_images[0]),
                                               len(train_images[0][0]),
                                               3)))  # Assuming RGB (3)
            else:
                self.model.add(layers.MaxPooling2D((2, 2)))
                self.model.add(
                    layers.Conv2D(int(contents[0]),
                                  (int(contents[1]), int(contents[2])),
                                  activation=contents[3]))
            iter += 1
        #self.model.add(layers.MaxPooling2D((2, 2)))
        #self.model.add(layers.Conv2D(64, (3, 3), activation='relu'))

        self.model.summary()

        self.model.add(layers.Flatten())
        densefile = open(PyPluMA.prefix() + "/" + self.parameters['dense'])
        for line in densefile:
            contents = line.strip().split('\t')
            self.model.add(
                layers.Dense(int(contents[0]), activation=contents[1]))
        self.model.add(layers.Dense(len(self.class_names)))

        self.model.summary()

        self.model.compile(optimizer=self.parameters['optimize'],
                           loss=tf.keras.losses.SparseCategoricalCrossentropy(
                               from_logits=True),
                           metrics=[self.parameters['metric']])

        #print(self.model.output)
        #print(dir(self.model))
        #exit()
        history = self.model.fit(train_images,
                                 train_labels,
                                 epochs=int(self.parameters['epochs']),
                                 validation_data=(self.test_images,
                                                  self.test_labels))
Example #14
0
 def input(self, filename):
     tempstuff = open(filename, 'r')
     self.myfile = PyPluMA.prefix() + "/" + tempstuff.readline().strip()
     self.mycountfile = PyPluMA.prefix() + "/" + tempstuff.readline().strip(
     )