def GetDistribListForDirectory(directory, filesToFind): distribList = list(); for seqFile in SeqGenUtils.findFiles(directory, filesToFind): seqs, gc_list, fg_lengths = shuffle_utils.get_seqs(seqFile) dinuc_distrib = shuffle_utils.compute_dinuc_distrib(seqs, True) distribList.append(dinuc_distrib) return distribList;
def parseSubDirectories(resultDir, level=1): gcContentMap = dict(); resultFileName = resultDir + resultDir[:-1] + "_GC_Content.out" print "Result FileName: ", resultFileName for signalFile in SeqGenUtils.findFiles(resultDir, "Signal*.fa"): print "Signal File: ", signalFile; expt_name = os.path.dirname(signalFile).split("/")[2] gcContentValue, atContentValue = gcContent.getNucleotideComposition(signalFile) print gcContentValue, atContentValue; if expt_name in gcContentMap.keys(): gcContentMap[str(expt_name)].append(gcContentValue); else: gcContentMap[str(expt_name)] = [gcContentValue]; gcContentMeanStd = dict(); for key, value in gcContentMap.iteritems(): meanValue = np.mean(value); varianceValue = np.var(value); gcContentMeanStd[key] = [meanValue, varianceValue] writeDictToFile(gcContentMeanStd, resultFileName);