Ejemplo n.º 1
0
    def makeResults(self, parallelResult, oEnvironment, args):

        allDiffRegData = {}
        allDiffRegSims = defaultdict(dict)
        conditions = set()

        for file in args.diffreg:

            thisData = EnrichmentDF(DataFrame.parseFromFile(file))
            condPair = tuple(thisData.getConditions())

            for cond in condPair:
                conditions.add(cond)

            allDiffRegData[condPair] = thisData

            for method in args.methods:

                methodFCs = []
                for x in thisData.getColumn(method + "_log2FC"):
                    if x != None and x!= 'None':
                        methodFCs.append(abs(float(x)))

                average = sum(methodFCs) / len(methodFCs)

                allDiffRegSims[method][condPair] = average


        allConditions = sorted(list(conditions))

        for method in allDiffRegSims:

            sims = np.zeros( (len(allConditions), len(allConditions)) )

            for condPair in allDiffRegSims[method]:
                sims[ allConditions.index(condPair[0]), allConditions.index(condPair[1]) ] = allDiffRegSims[method][condPair]
                sims[allConditions.index(condPair[1]), allConditions.index(condPair[0])] = allDiffRegSims[method][
                    condPair]

            PorePlot.heat_map_cluster(sims, allConditions, allConditions, "Similarity: " + str(method), "", pltcfg=args.pltcfg)
Ejemplo n.º 2
0
    def __init__(self, args):

        super(FoldChangeAnalysis, self).__init__(args)

        self.counts = None
        self.condData = EnrichmentDF()
Ejemplo n.º 3
0
    def makeResults(self, parallelResult, oEnvironment, args):

        if not args.counts == None:
            """
            counts is a defaultdict(list) for each condition name with maybe multiple samples
            """
            counts = self.readCounts(args)

            vConds = sorted([x for x in counts])

            createdComparisons = defaultdict(list)
            conditions = []

            for valueSource in ['coverage', 'read_counts']:
                self.condData = EnrichmentDF()

                replicates = {}

                for condition in vConds:

                    condData = counts[condition]

                    condReplicates = []
                    for condDataSample in condData:

                        geneNames = condDataSample.getColumnIndex('gene')
                        geneCounts = condDataSample.getColumnIndex(valueSource)

                        condRow = condDataSample.toDataRow(
                            geneNames, geneCounts)

                        sampleName = condDataSample.filepath
                        conditions.append(sampleName)

                        condReplicates.append(sampleName)

                        self.condData.addCondition(condRow, sampleName)

                    replicates[condition] = condReplicates

                print("Running for conditions: " + str(vConds))

                createdComparisons[valueSource] += self.condData.runDEanalysis(
                    args.output,
                    prefix=valueSource,
                    rscriptPath=args.rscript.name,
                    methods=args.methods,
                    replicates=replicates,
                    noDErun=args.noanalysis)

            self.prepareHTMLOut(createdComparisons, replicates, args)

        if args.diffreg != None:

            createdComparisons = defaultdict(list)
            conditions = set()

            for file in args.diffreg:

                df = EnrichmentDF.parseFromFile(file)
                valueSource = self.getValueSource(df)

                conditions += df.getConditions()

                createdComparisons[valueSource].append(file)

            self.prepareHTMLOut(createdComparisons, conditions, args)
Ejemplo n.º 4
0
    def makeResults(self, parallelResult, oEnvironment, args):

        allDiffRegData = {}
        allDiffRegSims = defaultdict(dict)
        conditions = set()

        def parseNones(row):

            ret = [None] * len(row)
            for i in range(0, len(row)):
                if row[i] != 'None':
                    ret[i] = row[i]

            return ret

        topGenes = Counter()

        for file in args.diffreg:

            thisData = EnrichmentDF(DataFrame.parseFromFile(file))
            thisData.applyToRow(parseNones)

            condPair = tuple(thisData.getConditions())

            for cond in condPair:
                conditions.add(cond)

            allDiffRegData[condPair] = thisData

            for method in args.methods:

                methodFCs = []

                pvals = thisData.toDataRow(
                    thisData.getColumnIndex('id'),
                    thisData.getColumnIndex(method + "_RAW.PVA"))
                genepval = [(x[0], float(x[1])) for x in pvals.to_pairs()
                            if x[1] != None]

                genepval.sort(key=lambda x: x[1])

                for i in range(0, args.top):
                    topGenes[genepval[i][0]] += 1

        outputDF = DataFrame()

        geneIDidx = outputDF.addColumn('gene_id')
        countIdx = outputDF.addColumn('count')
        linkIdx = outputDF.addColumn('link')

        for (gene, count) in topGenes.most_common():

            geneRow = DataRow.fromDict({
                'gene_id':
                gene,
                'count':
                count,
                'link':
                "<a href='http://www.uniprot.org/uniprot/?query=" + gene +
                "&sort=score' target='_blank'>UniProt</a>",
            })

            outputDF.addRow(geneRow)

        outputDF.export(args.output, ExportTYPE.HTML)
Ejemplo n.º 5
0
    def makeResults(self, parallelResult, oEnvironment, args):

        if not args.counts == None:
            """
            counts is a defaultdict(list) for each condition name with maybe multiple samples
            """

            geneEnhancement = self.loadEnhancement(args.enhanced)
            geneLengths = self.loadGeneLengths(args.lengths)

            counts, cond2samples = self.readCounts(args,
                                                   biotypes=geneEnhancement,
                                                   gene2length=geneLengths)

            #vConds = sorted([x for x in counts])
            vConds = [x for x in counts]

            createdComparisons = defaultdict(list)
            conditions = []

            for valueSource in ['count']:
                self.condData = EnrichmentDF()
                replicates = OrderedDict()

                for condition in vConds:

                    condData = counts[condition]

                    condReplicates = []
                    for condDataSample in condData:
                        geneNames = condDataSample.getColumnIndex('gene')
                        geneCounts = condDataSample.getColumnIndex(valueSource)

                        rowUpdates = []
                        sampleName = condDataSample.filepath

                        print(sampleName, len(condDataSample))
                        for row in condDataSample:

                            rowData = {
                                "id": row["gene"],
                                sampleName: row[valueSource]
                            }

                            if args.libsize:
                                rowData[sampleName + ".LS"] = row["LS"]

                            if args.fpkm:
                                rowData[sampleName + ".FPKM"] = row["FPKM"]

                            if args.tpm:
                                rowData[sampleName + ".TPM"] = row["TPM"]

                            rowUpdates.append(rowData)

                        #condRows = condDataSample.namedRows(geneNames, interestCols)
                        #condRow = condDataSample.toDataRow(geneNames, geneCounts)

                        conditions.append(sampleName)
                        condReplicates.append(sampleName)

                        print("Add Condition", sampleName, rowUpdates[0])
                        self.condData.addConditions(rowUpdates, sampleName)

                    replicates[condition] = condReplicates

                print("Running for conditions: " + str(vConds))

                createdComparisons[valueSource] += self.condData.runDEanalysis(
                    args.output,
                    prefix=valueSource,
                    rscriptPath=args.rscript.name,
                    methods=args.methods,
                    replicates=replicates,
                    noDErun=args.noanalysis,
                    enhanceSymbol=geneEnhancement,
                    geneLengths=geneLengths)

            self.prepareHTMLOut(createdComparisons, replicates, args)
Ejemplo n.º 6
0
    def __init__(self, args):

        super(FoldChangeSimilarity, self).__init__(args)

        self.counts = None
        self.condData = EnrichmentDF()