コード例 #1
0
def example():
	if request.method == 'GET':

		_file_a = request.args.get("file_a")
		_file_b = request.args.get("file_b")
		_file_c = request.args.get("file_c")
		
		_v = ViewRenderer("ml")
		_v.render()
		if(_file_a != None):
				
			#if you have ther parameters...
			#m = Ml("data/n2/n2_sample/1425405680330/","data/n2/n2_1/1425406094608/","data/n2/n2_2/1425407232389/")
			m = Ml(_file_a,_file_b,_file_c)
			_data = m.classify()
			h = Helper()
			_data = h.listToGrid(_data)
			_v = _v.inject("%%%checkboxes%%%",str(_data))
			return _v
		else:
			_v = _v.inject("%%%checkboxes%%%","")
			return _v

	else:
		return "Method nor supported"
コード例 #2
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
    def deleteOverlapsFromVcf(self,variants):
        '''
        delete the variants from 'variantsA' which also are in 'variantsB'
        '''

        variantSetA = set(self.variantDict.keys())
        
        #detrmine type of variantB
        if type(variants) == str:
            variantsB = open(variants)
        elif type(variants) != file:
            raise TypeError("variantB has wrong type, need str or file, %s found" % type(variantsB))
        #TODO: variants could also be another object of VariantsSet
        
        #get Start time
        startTime = Helper.getTime()
        Helper.info(" [%s] Delete overlapps from %s" % (startTime.strftime("%c"),variantsB.name),self.logFile,self.textField)

        for line in variantsB:
            if line.startswith("#"):
                continue
            for varTuple in self.getVariantTuble(line):
                if varTuple in variantSetA:
                #A.discard(varTuple)
                    variantSetA.remove(varTuple)
                    del self.variantDict[varTuple]
        
        #calculate duration 
        Helper.printTimeDiff(startTime,self.logFile,self.textField)
コード例 #3
0
 def overallAnalytics(self):
     print('Total number of entries: ', end=' ')
     print(len(self.wordCountOfEntriesDict))
     print('First entry: ', end=' ')
     print(Helper.prettyPrintDate(self.firstDate))
     print('Last entry: ', end=' ')
     print(Helper.prettyPrintDate(self.mostRecentDate))
     print('Total days from first to last entry: ', end=' ')
     totalDays = self.mostRecentDate - self.firstDate #this is correct
     days = totalDays.days
     print(days)
     print('Percentage of days from first to last with an entry: ', end=' ')
     print(str(round(float(len(self.wordCountOfEntriesDict)) / days * 100, 2)) + '%')
     print('Average length per entry: ', end=' ')
     numberOfEntries = len(self.wordCountOfEntriesDict)
     sumOfLengths = 0
     longestEntryLength = 0
     for date in list(self.wordCountOfEntriesDict.keys()):
         length = self.wordCountOfEntriesDict[date]
         if length > longestEntryLength:
             longestEntryLength = length
             longestEntryDate = date
         sumOfLengths += length 
     print(round(float(sumOfLengths) / numberOfEntries, 2))
     print('Longest entry: ' + str(longestEntryLength) + ' words on ', end=' ')
     print(Helper.prettyPrintDate(longestEntryDate))
     print('Total number of words written: ', end=' ')
     print(locale.format("%d", self.totalNumberOfWords, grouping=True))
コード例 #4
0
    def _getSubsetForGP(self, S, random=True, normalize=True):
        Nsubset = min(self.numSamplesSubsetGP, S.shape[0])

        if random:
            return Helper.getRandomSubset(S, Nsubset)
        else:
            return Helper.getRepresentativeRows(S, Nsubset, normalize)
コード例 #5
0
ファイル: createDiagrams.py プロジェクト: djhn75/RNAEditor
def parseSummaryFile(sumFile,logFile=None,textField=0):
    '''
    Parses a .summary file from an rnaEditor output directory and returns it as an ordered dict
    Note: unedited Genes will be skipped
    :param sumFile: .summary file of rnaEditor
    :param logFile:
    :param textField:
    :return: OrderedDict {GeneName1:[GeneId1,3'UTR,5'UTR,EXON,Intron,Total]}
    '''
    
    
    if type(sumFile)==str:
        try:
            sumFile=open(sumFile,"r")
        except IOError:
            Helper.warning("Could not open %s to write Variant" % sumFile ,logFile,textField)
    elif type(sumFile)==file:
        pass
    else:
        raise TypeError("Summary file hat to be path or file object", logFile, textField)
    
    dict=OrderedDict()
    totalGenes=0
    for line in sumFile:
        if line.startswith("#"): continue #skip comments
        line = line.rstrip().split()
        totalGenes+=1
        if int(line[6])<1: continue #skip unedited genes
        try:
            v=map(int,line[2:7])
        except ValueError:
            v=line[2:7]
        dict[line[0]]=[line[1]]+v
        
    return dict,totalGenes
コード例 #6
0
ファイル: MapFastq.py プロジェクト: djhn75/RNAEditor
    def __init__(self,rnaEdit):
        '''
        Constructor
        '''
        
        self.rnaEdit=rnaEdit

        """
        #check read Quality encoding and convert to phred33 quality if necessary
        for i in range(len(self.rnaEdit.fastqFiles)):
            if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[i], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False:
                self.rnaEdit.fastqFiles[i]=Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[i],self.rnaEdit.params.output+ "_" + str(i+1) + "_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField)
        """
        
        
        #set fastQ files and check if the qualitys have to be converted
        if self.rnaEdit.params.paired==True:
            if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[0], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False or Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[1], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False:
                self.fastqFile1 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[0],self.rnaEdit.params.output+ "_1_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField)
                self.fastqFile2 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[1],self.rnaEdit.params.output+ "_2_phred33.fastq",self.rnaEdit.logFile,self.rnaEdit.textField)
            else:
                self.fastqFile1=self.rnaEdit.fastqFiles[0]
                self.fastqFile2=self.rnaEdit.fastqFiles[1]
        elif self.rnaEdit.params.paired==False:
            if Helper.isPhred33Encoding(self.rnaEdit.fastqFiles[0], 1000000, self.rnaEdit.logFile, self.rnaEdit.textField) == False:
                self.fastqFile1 = Helper.convertPhred64toPhred33(self.rnaEdit.fastqFiles[0], self.rnaEdit.params.output + "_1_phred33.fastq", self.rnaEdit.logFile, self.rnaEdit.textField)
            else:
                self.fastqFile = self.rnaEdit.fastqFiles[0]
コード例 #7
0
ファイル: tunein.py プロジェクト: dernico/mediaschlingel
def parse_pls(url):
    urls = []
    pls_content = Helper.downloadString(url)
    
    stream = Helper.parsePls(pls_content)
    if stream:
        urls.append(stream)
    return urls
コード例 #8
0
ファイル: GuiControll.py プロジェクト: djhn75/RNAEditor
    def newAssay(self):
        '''
        Function wich starts a new analysis
        '''
        
        inputTab = self.view.tabMainWindow.widget(0)
        
        #get Parameters 
        parameters=Parameters(inputTab)
        if parameters.paired==True:
            #fastqs=inputTab.dropList.dropFirstTwoItems()
            fastqs = inputTab.dropList.dropFirstItem()
            if fastqs[0]!=None:
                if not str(fastqs[0].text()).endswith(".bam"):
                    fastqs+=inputTab.dropList.dropFirstItem()
        else:
            fastqs = inputTab.dropList.dropFirstItem()        
        
        """
        check if droplist returned a value
        """
        if parameters.paired==True:
            if fastqs[-1] == None:
                QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNot enough Sequencing Files for paired-end sequencing!!!\n\nDrop FASTQ-Files to the drop area!")
                return
        if fastqs[0] == None:
            QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNo Sequencing Files found!!!\n\nDrop FASTQ-Files to the drop area!")
            return
        sampleName = Helper.getSampleName(str(fastqs[0].text()))
        if sampleName == None:
            QtGui.QMessageBox.information(self.view,"Warning","Warning:\nNo valid Sequencing File!!!\n\nDrop FASTQ-Files to the drop area!")
            return
        
        fastqFiles=[]
        for fastq in fastqs:
            fastqFiles.append(str(fastq.text()))

        
        
        runTab = RunTab(self)

        
        
        #initialize new Thread with new assay
        try:
            assay = RnaEdit(fastqFiles, parameters,runTab.commandBox)
        except Exception as err:
            QtGui.QMessageBox.information(self.view,"Error", str(err)+"Cannot start Analysis!")
            Helper.error(str(err) + "\n creating rnaEditor Object Failed!", textField=runTab.commandBox)
        currentIndex = self.view.tabMainWindow.count()

        # self.view.tabMainWindow.addTab(self.runTab, "Analysis"+ str(Helper.assayCount))
        self.view.tabMainWindow.addTab(runTab, sampleName + " " + str(currentIndex))
        Helper.runningThreads.append(assay)
        
        assay.start()
        
        self.view.connect(assay, QtCore.SIGNAL("taskDone"), self.openAnalysis)
コード例 #9
0
ファイル: tunein.py プロジェクト: dernico/mediaschlingel
def parse_m3u(url):

    urls = []
    m3u_content = Helper.downloadString(url)
    
    stream = Helper.parsem3u(m3u_content)
    if stream:
        urls.append(stream)
    return urls
コード例 #10
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
 def deleteNonEditingBases(self):
     startTime=Helper.getTime()
     Helper.info("Delete non Editing Bases (keep only T->C and A->G)",self.logFile,self.textField)
     
     for varTuple in self.variantDict.keys():
         chr,pos,ref,alt = varTuple
         if (ref =="A" and alt == "G") or (ref=="T" and alt=="C"):
             pass
         else:
             del self.variantDict[varTuple]
コード例 #11
0
    def _updateBandwidthsGP(self, Ssub):
        bwNonKb = Helper.getBandwidth(Ssub[:, 0:self.NUM_NON_KB_DIM],
                Ssub.shape[0], self.bwFactorNonKbGP)

        kbPos = Ssub[:, self.NUM_NON_KB_DIM:]
        bwKb = Helper.getBandwidth(self._reshapeKbPositions(kbPos),
                Ssub.shape[0], self.bwFactorKbGP)

        self.policy.kernel.setBandwidth(bwNonKb, bwKb)
        self.policy.kernel.setWeighting(self.weightNonKbGP)
コード例 #12
0
    def readFile(self, url):
        try:
            f = open(url, 'r')
        except:
            print('File not found')
            newPath = input('Enter new path > ');
            return self.readFile(newPath) #TODO: this doesn't work for entirely unknown reasons

        newdate = re.compile('\s*([0-9]{1,2}-[0-9]{1,2}-[0-9]{2})\s*')
        currentDateStr = None
        currentDateObj = None
        numWords = 0
        namesFound = set()
        totalWordNum = 0

        currentDayEntry = '' #holds all the lines for the current day, so we can compute a hash of the day later on
        
        line = f.readline()
        while (line != ''):
            if self.prefs.GUESS_NAMES:
                self.guessNames(line)
            #check a line to see if it's a date, therefore a new day
            dateFound = newdate.match(line)
            if dateFound != None: #it's a new date, so wrapup the previous date and set up to move onto the next one
                if namesFound != None:
                    self.addRelatedNames(namesFound)
                    namesFound = set()
                    self.dayEntryHashTable[currentDateObj] = hashlib.md5(currentDayEntry.encode()) #TODO: deal with first date

                if numWords > 0:
                    self.wordCountOfEntriesDict[currentDateObj] = numWords #should be here, since we want it triggered at the end
                totalWordNum += numWords
                numWords = 0
                currentDateStr = dateFound.group(0)
                currentDateStr = Helper.formatDateStringIntoCleanedString(currentDateStr)
                currentDateObj = Helper.makeDateObject(currentDateStr)

                if currentDateObj > self.mostRecentDate: #found a higher date than what we've seen so far
                    self.mostRecentDate = currentDateObj
                if currentDateObj < self.firstDate: #found a lower date than what we have now
                    self.firstDate = currentDateObj
                line = line[len(currentDateStr):] #remove date from line, so it's not a word

            if currentDateStr != None:
                (wordsFound, namesFoundThisLine) = self.addLine(line, currentDateObj)
                for name in namesFoundThisLine:
                    namesFound.add(name)
                numWords += wordsFound
            line = f.readline()
            currentDayEntry += line #add line to the day's entry

        #need to capture the last date for the entry length
        self.wordCountOfEntriesDict[currentDateObj] = numWords 
        self.totalNumberOfWords = totalWordNum + numWords #need to get words from last line
        f.close()
コード例 #13
0
ファイル: RNAEditor.py プロジェクト: djhn75/RNAEditor
 def stopImmediately(self):
     if hasattr(self, 'callEditSites'):
         self.callEditSites.cleanUp()
     self.isTerminated=True
     
     if self.runningCommand != False:
         self.runningCommand.kill()
     else:
         self.terminate()
         self.wait()
     Helper.error("Analysis was terminated by User", self.logFile, self.textField)
コード例 #14
0
ファイル: RNAEditor.py プロジェクト: djhn75/RNAEditor
 def run(self):
     try:
         self.startAnalysis()
     except Exception:
         Helper.error("RnaEditor Failed",self.logFile,self.textField)
     
     """ At this point the RnaEditor has succesfully finished """
     fileDir = os.path.dirname(os.path.realpath(__file__))
     cmd=["python",fileDir+"/createDiagrams.py","-o", self.params.output]
     a=subprocess.call(cmd)
     self.emit(QtCore.SIGNAL("taskDone"), self.params.output+".html")
コード例 #15
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
    def topGenes(self,sumDict, fileName,number=20,value=4):
        if number > len(sumDict):
            if len(sumDict)<1:
                Helper.warning("no edited genes found", self.logFile, self.textField)
                return
            Helper.warning("The number of top genes you wanted is bigger than the number of edited genes", self.logFile, self.textField)
            number=len(sumDict)
        if value > 4:
            Helper.error("sumDict only hold four values", self.logFile, self.textField)
        
        
        counts=collections.OrderedDict(sorted(sumDict.items(), key=lambda t: t[1][value],reverse=True)[:number])
        barNameTuple=()
        valueMatrix=[[]]
        for array in counts.values():
            valueMatrix[0].append(array[value])
        for gene in counts.keys():
            barNameTuple+=(gene.names[0],)

        if value==0:
            barName="3'-UTR"
        elif value==1:
            barName="5'-UTR"
        elif value==2:
            barName="Exonic"
        elif value==3:
            barName="Intronic"
        elif value==4:
            barName="Total"
        
        yLim=max(max(i) for i in valueMatrix)+1
        Helper.createBarplot(valueMatrix, fileName, barNameTuple, [barName], width=0.35, title="Highly Edited Genes",yLim=yLim,barText=False,yText="Editing Counts")
コード例 #16
0
ファイル: RNAEditor.py プロジェクト: djhn75/RNAEditor
    def startAnalysis(self):
        """
        START MAPPING
        """
        if self.fastqFiles[0].endswith("bam"):
            if self.fastqFiles[0].endswith("noDup.realigned.recalibrated.bam"):
                Helper.info("Bam File given. Skip mapping", self.logFile, self.textField)
                self.mapFastQ=None
                mapResultFile=self.fastqFiles[0]
            else: 
                Helper.error("Bam File was not mapped with RnaEditor, this is not supported. Please provide the fastq Files to RnaEditor", self.logFile, self.textField, "red")
        else:
            self.mapFastQ=MapFastq(self)
            mapResultFile=self.mapFastQ.startAnalysis()

        """
        START CALLING EDITING SITES
        """
        self.callEditSites=CallEditingSites(mapResultFile,self)
        result = self.callEditSites.startAnalysis()
        
        
        
        #finished
        self.isTerminated=True
        
        
        
        Helper.status("rnaEditor Finished with %s" % self.params.output, self.logFile, self.textField,"green",True)
        Helper.status("Open %s to see the results" % self.params.output+".html", self.logFile, self.textField,"green",True)
        self.cleanUp()
コード例 #17
0
    def _updateKernelParameters(self, S, A, random=True, normalize=True):
        SA = self._getStateActionMatrix(S, A)

        if random:
            self.MuS = Helper.getRandomSubset(S, self.numFeatures)
            self.MuSA = Helper.getRandomSubset(SA, self.numFeatures)
        else:
            self.MuS = Helper.getRepresentativeRows(S, self.numFeatures, normalize)
            self.MuSA = Helper.getRepresentativeRows(SA, self.numFeatures, normalize)

        NUM_SAMPLES_FOR_BW_ESTIMATE = 500

        # bandwidth for PHI_S
        bwNonKbS = Helper.getBandwidth(self.MuS[:, 0:self.NUM_NON_KB_DIM],
                NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbS)

        kbPosS = self._reshapeKbPositions(self.MuS[:, self.NUM_NON_KB_DIM:])
        bwKbS = Helper.getBandwidth(kbPosS, NUM_SAMPLES_FOR_BW_ESTIMATE,
                self.bwFactorKbS)

        self.kernelS.setBandwidth(bwNonKbS, bwKbS)
        self.kernelS.setWeighting(self.weightNonKbS)

        # bandwidth for PHI_SA
        bwNonKbSA = Helper.getBandwidth(self.MuSA[:, 0:(self.NUM_NON_KB_DIM + 2)],
                NUM_SAMPLES_FOR_BW_ESTIMATE, self.bwFactorNonKbSA)

        kbPosSA = self._reshapeKbPositions(self.MuSA[:, (self.NUM_NON_KB_DIM + 2):])
        bwKbSA = Helper.getBandwidth(kbPosSA, NUM_SAMPLES_FOR_BW_ESTIMATE,
                self.bwFactorKbSA)

        self.kernelSA.setBandwidth(bwNonKbSA, bwKbSA)
        self.kernelSA.setWeighting(self.weightNonKbSA)
コード例 #18
0
ファイル: RNAEditor.py プロジェクト: djhn75/RNAEditor
    def __init__(self, fastqFiles, params, textField):
        QtCore.QThread.__init__(self)
        if isinstance(params, Parameters):
            self.params = params
        else:
            Helper.error("Params has to be Instance of Parameters")
        if isinstance(textField, QtGui.QTextEdit) or textField==0:
            self.textField=textField
        else:
            Helper.error("textField has to be Instance of QtGui.QTextEdit or 0")
        

        self.fastqFiles=fastqFiles
        
        #hold the running Popen object
        self.runningCommand=False
        self.isTerminated = False
        #check if the input Files are there
        
        #hold basic statistic values of the run
        basicStatDict={}
            
        
        #set directory where the outputFiles should be written to
        if self.params.output=="default":
            if self.fastqFiles[0].endswith("noDup.realigned.recalibrated.bam"):
                self.sampleName=fastqFiles[0][fastqFiles[0].rfind("/")+1:fastqFiles[0].rfind(".noDup.realigned.recalibrated.bam")]
                self.outdir=fastqFiles[0][0:fastqFiles[0].rfind("/")+1]
            else:
                self.sampleName=fastqFiles[0][fastqFiles[0].rfind("/")+1:fastqFiles[0].rfind(".")]
                # outdir = /path/to/output/rnaEditor/samplename/
                self.outdir=fastqFiles[0][0:fastqFiles[0].rfind("/")+1]+"rnaEditor/"+self.sampleName+"/"
            
            #output=/path/to/output/rnaEditor/samplename/samplename
            self.params.output=self.outdir+self.sampleName
            if not os.path.exists(self.outdir):
                os.makedirs(self.outdir, mode=0755)
                os.chmod(self.outdir, 0755)

            #create folder for html output
            if not os.path.exists(self.outdir+"/html"):
                os.makedirs(self.outdir+"/html", mode=0755)
                os.chmod(self.outdir, 0755)
        
        
        self.checkDependencies()
        
        #check if the input Files are there
        self.printParameters()
コード例 #19
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
 def annotateVariantDict(self,genome):
     '''
     adds the corresponding Gene and the exact segment wehre the SNP appears
     :param genome: Genome
     '''
     startTime = Helper.getTime()
     Helper.info(" [%s] Annotating Variants" % (startTime.strftime("%c")),self.logFile,self.textField)
     for v in self.variantDict.values():
         anno = genome.annotatePosition(v.chromosome,v.position) #[(gene1,segment1;segment2;..)..]
         GI=[]
         for a in anno:
             GI.append(a)
         v.attributes["GI"]=GI
     
     Helper.printTimeDiff(startTime,self.logFile,self.textField)
コード例 #20
0
ファイル: Comments.py プロジェクト: hrishikeshdok/head2head
    def post(self):
        itemName = self.request.get("itemName")
        categoryName = self.request.get("categoryName")
        userEmail = self.request.get("userEmail")
        comment = self.request.get("comment")
        owner = self.request.get("owner")
        
        comment = str(comment).strip()
        message = ""
        if comment:
            ifAlreadyExists = []
            
            #self.response.out.write("Key is "+ str(Helper.getItemKey(userEmail, categoryName, itemName)))
            ifAlreadyExists = ItemComment.gql("WHERE ANCESTOR IS :1",Helper.getItemKey(userEmail, categoryName, itemName))
            
            #self.response.out.write("Key is "+ str(Helper.getItemKey(userEmail, categoryName, itemName)))
            #self.response.out.write("Count is "+ str(ifAlreadyExists.count()))
    
            
                    
            if (ifAlreadyExists.count() == 0):
                #self.response.out.write("Adding comment for " + itemName + " " + categoryName + " " + userEmail)
                itemComment = ItemComment(parent=Helper.getItemKey(userEmail, categoryName, itemName))
                itemComment.comment = comment
                itemComment.commenter = userEmail
                itemComment.item = itemName
                itemComment.category = categoryName
                itemComment.put()
                message="Comment Saved"
            else:
                message="You can comment only once on an item"

        else:
            #self.response.out.write("empty comment")
            message = "Cannot enter empty comment"
            
            
        items = Item.gql("WHERE ANCESTOR IS :1",Helper.getCategoryKey(owner, categoryName))
        template_values = {
                           'items' : items,
                           'owner': owner,
                           'user' : users.get_current_user(),
                           'category' : categoryName,
                           'logoutURL' : users.create_logout_url('./'),
                           'message' : message
                  }
        path = os.path.join(os.path.dirname(__file__), './html/items.html')
        self.response.out.write(template.render(path, template_values))
コード例 #21
0
ファイル: RNAEditor.py プロジェクト: djhn75/RNAEditor
    def cleanUp(self):
        #print "deleteAssay " + str(self)
        if self.runningCommand != False:
            self.runningCommand.kill()
 
        try:
            if self.mapFastQ!=None:
                self.mapFastQ.cleanUp()
            del self.mapFastQ
        except AttributeError:
            Helper.error("could not delete MapFastQ instance", self.logFile, self.textField)
        try:
            self.callEditSites.cleanUp()
            del self.callEditSites
        except AttributeError:
            Helper.error("could not delete RnaEdit instance", self.logFile, self.textField)
コード例 #22
0
    def getMarkUnderWord(self, displayName, last20Words, wasPluralWithApostrophe):
        assert type(displayName) is str
        originalWord = displayName #needed when the name isn't actually a name
        displayName = Helper.cleanWord(displayName, True)

        print('\n\n\n')
        for x in last20Words:
            print(x + ' ', end='')


        print('\n' + displayName + ':')
        numPossibleLastNames = 0

        if displayName in self.uniqueDisplayNamesToNameDict.keys(): #we've specified to give the same markup to all these display names
            firstName = self.uniqueDisplayNamesToNameDict[displayName][0]
            lastName = self.uniqueDisplayNamesToNameDict[displayName][1]
        else: #proceed normally
            firstName = ''
            print('Is this the proper first name for ' + displayName + '? [enter] for yes, [n] for no')
            isProperFirstName = input('>')
            if isProperFirstName == 'n':
                print('Enter proper first name (or enter "None" if this is not a name)')
                possibleFirstName = input('>')
                if possibleFirstName == 'None' or possibleFirstName == 'none': #not actually a name
                    return WordClass.addWordOrMarkup(originalWord)
                firstName = possibleFirstName
            else:
                firstName = displayName

            try:
                self.lastNamesForFirstNameDict[firstName] #trigger exception if there's one to be thrown
                for nameFromDict in self.lastNamesForFirstNameDict[firstName]:
                    print(str(numPossibleLastNames) + ': ' + nameFromDict)
                    numPossibleLastNames = numPossibleLastNames + 1
                print('Or type new last name (append "!" at end to auto assign all instance of this name to this last name):')
            except:
                print('Type last name (append "!" at end to auto assign all instance of this name to this last name):')

            #get the last name either from the number of the choice (if it's a number) or the last name that was directly entered
            lastName = ''
            choice = input('>')
            lastName = choice
            for x in range(0, numPossibleLastNames):
                if choice == str(x):
                    lastName = self.lastNamesForFirstNameDict[firstName][x]
                break

            if lastName[-1] == '!': #specify that all instance of this display name are assigned to this last name, without asking again
                lastName = lastName[:-1]
                self.uniqueDisplayNamesToNameDict[displayName] = (firstName, lastName)

        try:
            if lastName not in self.lastNamesForFirstNameDict[firstName]:
                self.lastNamesForFirstNameDict[firstName].append(lastName)
        except:
            self.lastNamesForFirstNameDict[firstName] = [lastName]

        return WordClass.addNameWithMarkupPieces(displayName, firstName, lastName, wasPluralWithApostrophe)
コード例 #23
0
ファイル: radio.py プロジェクト: dernico/mediaschlingel
def _call(path, param=None):
        #print('call radio with path=%s, param=%s', path, param)
        url = '{0}/{1}'.format(RadioUrl, path)
        if param:
            url += '?' + urlencode(param)
        print("call radio with url: " + url)
        response = Helper.downloadString(url)
        json_data = json.loads(response)
        return json_data
コード例 #24
0
ファイル: tunein.py プロジェクト: dernico/mediaschlingel
def _call(url, param=None):
    # print('call radio with path=%s, param=%s', path, param)
    if param:
        param['key'] = yt_key
        url += '?' + urlencode(param)
    print("call tunein with url: " + url)
    response = Helper.downloadString(url)
    json_data = json.loads(response)
    return json_data
コード例 #25
0
ファイル: recountReads.py プロジェクト: djhn75/RNAEditor
def fillDicts(files,columns,keys):
    '''
        creates the table and fills the set of keys
    '''
    fileNumber=len(files)
    fileCounter=0
    keySet=()
    fileCounter=0
    for file in files: #loop through all files
        i=0
        Helper.info("Get information from %s" % file)
        file = open(file)
        
        for line in file: #loop through current file
            line = line.split()
            keyTuple=()
            for k in keys:
                keyTuple=keyTuple+(line[k-1],)
            
            value=[]
            for column in columns: #get the needed values
                try:
                    value.append(line[column-1])
                except IndexError:
                    raise ValueError("Not enough rows in line: %s in file %s" % (" ".join(line),file.name))
            
            if keyTuple in keySet:
                #currentDefaultList=idDict[keyTuple]
                #currentDefaultList[fileCounter]=value
                #idDict[keyTuple]=currentDefaultList
                idDict[keyTuple][fileCounter]=value #replace filecounter List with values from current File
            else:
                currentDefaultList=[["--"]*len(columns)]*len(files) #create default list, with all values empty
                currentDefaultList[fileCounter]=value
                idDict[keyTuple]=currentDefaultList
                keySet=keySet+(keyTuple,)
            
            i+=1
            if i % 1000 == 0:
                Helper.status("%s lines parsed" % i)
        fileCounter+=1
    return idDict,keySet
コード例 #26
0
ファイル: BgpParser.py プロジェクト: vgiotsas/multilateral
 def parse_summary(ixp, inputfile, ipversion, ixpParam):
     """
     Function to parse a  BGP summary output file.
     It prints the ASN->Neighbor IP mapping in a file
     """
     ipToAsn = {}
     addrPos, asnPos, ipcountPos, rtrType = [int(ixpParam["summary"]["ip"]), int(ixpParam["summary"]["asn"]),
                                             int(ixpParam["summary"]["ipCount"]), ixpParam["type"]]
     with open(inputfile, 'rb') as f:
         for line in f:
             # split the line to white spaces
             lineTokens = line.strip().split()
             if len(lineTokens) <= ipcountPos: continue
             interfaces = re.findall(
                 r'(?:\s|^|\(|\[)(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})\.(?:[\d]{1,3})(?:\s|\)|$|\])', line)
             if len(lineTokens) > addrPos and len(interfaces) > 0:
                 # check if the string that is supposed to be in the position of the address is indeed a valid IP address
                 ip = lineTokens[addrPos]
                 ipType = Helper.getIPNetwork(ip)
                 if str(ipType) == str(ipversion) or (ipType > 0 and int(ipversion) == 10):
                     # check if the string in the position of the ASN is a valid number
                     asn = lineTokens[asnPos]
                     asn = asn.replace("AS", "")
                     if '.' in asn:
                         asn = Helper.convertToAsn32(asn)
                     if Helper.isPositiveInt(asn):
                         # check if the ASN is active and advertises prefixes
                         # often the number of advertised prefixes may be split in total received/best
                         # in this case we want the total which is the first of the two numbers
                         ipcount = lineTokens[ipcountPos]
                         try:
                             if rtrType == "bird":
                                 received = re.findall(r"[\w']+", ipcount)[0]
                             elif rtrType == "quagga":
                                 received = ipcount
                         except:
                             print ipcount
                         # if string represents a valid positive number add asn->ip mapping to the dictionary
                         if Helper.isPositiveInt(received):
                             if ip not in ipToAsn:
                                 ipToAsn[ip] = asn
     return ipToAsn
コード例 #27
0
ファイル: ItemsPage.py プロジェクト: hrishikeshdok/head2head
 def post(self):
     user = users.get_current_user()
     message=""
     if user:
         category = self.request.get("category")
         isExport = self.request.get("isExport")
         owner = self.request.get("owner")
         
         if isExport:
             self.exportToXml(owner,category)
         else:
             newItemName = self.request.get("item_name").strip()
             ifAlreadyExists  = Item.gql("WHERE name = :1 AND ANCESTOR IS :2",newItemName,Helper.getCategoryKey(user.email(), category))
             
             if (ifAlreadyExists.count() == 0) and newItemName:
                 item = Item(parent=Helper.getCategoryKey(user.email(), category))
                 item.name = self.request.get("item_name")
                 item.wins = 0
                 item.loses = 0
                 item.put()
             else:
                 if newItemName:
                     message = "Item already exists"
                 else:
                     message = "Item name cannot be empty or spaces"
             
             items = db.GqlQuery("SELECT * FROM Item WHERE ANCESTOR IS :1",Helper.getCategoryKey(user.email(), category))
             
             template_values = {
                                'items' : items,
                                'category' : category,
                                'message' : message,
                                'logoutURL' : users.create_logout_url('./'),
                                'user':user,
                                'owner' : owner
                                }
             
             path = os.path.join(os.path.dirname(__file__), './html/items.html')
             self.response.out.write(template.render(path, template_values))
     
     else:
         self.redirect(users.create_login_url(self.request.uri))
コード例 #28
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
 def getOverlapsFromBed(self,bedFile,getNonOverlaps=False):
     '''
     returns overlaps from bed file features
     :param bedFile: as string or file
     :param getNonOverlaps: boolean
     :return new variantSet of overlaps 
     '''
     
     if type(bedFile) == str:
         bedFile = open(bedFile)
     elif type(bedFile) != file:
         raise TypeError("bedFile has wrong type, need str or file, %s found" % type(bedFile))
     
     startTime=Helper.getTime()
     Helper.info("[%s] Delete overlaps from %s" %  (startTime.strftime("%c"),bedFile.name) ,self.logFile,self.textField)
     
     variantsByChromosome = self.getVariantListByChromosome() 
     overlapps = set()
     for line in bedFile:
         try:
             sl = line.split("\t") 
             #if "\t" in line else line.split(" ")
             chromosome,start,stop = sl[:3]
             start,stop=(int(start),int(stop))
         except ValueError:
             raise ValueError("Error in line '%s'" % line)
         
         for v in variantsByChromosome[chromosome]:
             if start < v.position < stop:
                 overlapps.add((v.chromosome,v.position,v.ref,v.alt))
                  
     if getNonOverlaps:
         overlapps = set(self.variantDict.keys()) - overlapps #delete all accept the ones which are overlapping
     
     newSet={}
     for variantTuple in overlapps:
         #del self.variantDict[variantTuple]
         newSet[variantTuple]=self.variantDict[variantTuple]
     
     Helper.printTimeDiff(startTime, self.logFile,self.textField)
     return newSet
コード例 #29
0
ファイル: VariantSet.py プロジェクト: djhn75/RNAEditor
 def printClusters(self, outFile):
     
     if type(outFile) == str:
         try:
             outFile=open(outFile,"w")
             
         except IOError:
             Helper.warning("Could not open %s to write Variant" % outFile ,self.logFile,self.textField)
     if type(outFile) != file:   
         raise AttributeError("Invalid outfile type in 'printVariantDict' (need string or file, %s found)" % type(outFile))
     
     startTime=Helper.getTime()
     Helper.info("[%s] Print Clusters to %s" %  (startTime.strftime("%c"),outFile.name),self.logFile,self.textField)
     
     
     outFile.write("\t".join(["#Chr","Start","Stop","IslandID","GeneID","Gene Symbol","Cluster Length","Number of Editing_sites","Editing_rate","\n"]))
     
     for cluster in self.clusterDict.keys():
         end = max(v.position for v in self.clusterDict[cluster])
         start = min(v.position for v in self.clusterDict[cluster])
         
         length = end - start
         editingRate=float(len(self.clusterDict[cluster]))/float(length)
         geneIdSet=set()
         geneNameSet=set()
         for v in self.clusterDict[cluster]:
             try: 
                 gene = v.attributes['GI'][0][0]
                 if type(gene) == Gene:
                     geneIdSet.add(gene.geneId)
                     geneNameSet |= set(gene.names)
                     #geneList.append(v.attributes['GI'][0][0])
                 else:
                     geneIdSet.add("Intergenic")
                     geneNameSet.add("Intergenic")
             except KeyError:
                 geneIdSet.add("N/A") #when variant has no attribute GI
         
         outFile.write("\t".join([v.chromosome,str(start),str(end),"Island"+str(cluster), #Chr","Start","Stop","Cluster Name",
                                  ",".join(map(str,geneIdSet)),",".join(map(str,geneNameSet)), #"GeneID","Gene Symbol"
                                  str(length),str(len(self.clusterDict[cluster])),'%1.2f'%float(editingRate),"\n"]))
コード例 #30
0
ファイル: WebCrawler.py プロジェクト: Roknahr/pyCrawler
    def is_allowed(self, url):
        """ Returns ``True`` if allowed (not in robots.txt) - else returns ``False``. """
        disallowed = self.get_disallowed_sites(url, 'GingerWhiskeyCrawler')
        urlpath = Helper.get_path(url)
        result = True
        for path in disallowed:
            if path[-1] == '/':
                path += '*'
            if fnmatch(urlpath, path):
                result = False
                break

        return result
コード例 #31
0
ファイル: main.py プロジェクト: CAIDA/multilateral
    lgpar = lgParameters.LgParameters()
    now = datetime.datetime.now()
    #currentDate = now.strftime("%d-%m-%Y")
    parameters = lgpar.getLgProfile(asn)

    basename = '.'.join(outputfile.split(".")[:-1])
    extension = outputfile.split(".")[-1]

    if command == "summary":
        filepath = sendQuery(outputfile, asn, parameters, command)
        ip_to_asn = BgpParser.parse_summary(asn, filepath, 4, parameters["output"])
        ipfile = basename+"_addresses."+extension
        for ip in ip_to_asn:
	    #print ip_to_asn[ip]
            #Helper.saveToFile(ipfile, ip+" "+ip_to_asn[ip]+"\n", "a+", asn)
            Helper.saveToFile(ipfile, ip+" "+str(ip_to_asn[ip])+"\n", "a+", asn)
    elif command == "neighbor":
        # read the IP addresses/prefixes
        addresses = dict()
        addresses = getIptoASN(inputfile)
        counter = 1 # just for printing progress
        if len(addresses) < 1:
            print "Not enough addresses to query"
        else:
            for address in addresses:
                print str(counter) + ". " + asn + " " + ": " + address
                counter += 1
                filepath = sendQuery(outputfile, asn, parameters, command, address)
    elif command == "bgp":
        addresses = getIptoASN(inputfile2)
        neigh_file = basename+"_addresses."+extension
コード例 #32
0
ファイル: GuiControll.py プロジェクト: wangdi2014/RNAEditor
    def newAssay(self):
        '''
        Function wich starts a new analysis
        '''

        inputTab = self.view.tabMainWindow.widget(0)

        #get Parameters
        parameters = Parameters(inputTab)
        if parameters.paired == True:
            #fastqs=inputTab.dropList.dropFirstTwoItems()
            fastqs = inputTab.dropList.dropFirstItem()
            if fastqs[0] != None:
                if not str(fastqs[0].text()).endswith(".bam"):
                    fastqs += inputTab.dropList.dropFirstItem()
        else:
            fastqs = inputTab.dropList.dropFirstItem()
        """
        check if droplist returned a value
        """
        if parameters.paired == True:
            if fastqs[-1] == None:
                QtGui.QMessageBox.information(
                    self.view, "Warning",
                    "Warning:\nNot enough Sequencing Files for paired-end sequencing!!!\n\nDrop FASTQ-Files to the drop area!"
                )
                return
        if fastqs[0] == None:
            QtGui.QMessageBox.information(
                self.view, "Warning",
                "Warning:\nNo Sequencing Files found!!!\n\nDrop FASTQ-Files to the drop area!"
            )
            return
        sampleName = Helper.getSampleName(str(fastqs[0].text()))
        if sampleName == None:
            QtGui.QMessageBox.information(
                self.view, "Warning",
                "Warning:\nNo valid Sequencing File!!!\n\nDrop FASTQ-Files to the drop area!"
            )
            return

        fastqFiles = []
        for fastq in fastqs:
            fastqFiles.append(str(fastq.text()))

        runTab = RunTab(self)

        #initialize new Thread with new assay
        try:
            assay = RnaEdit(fastqFiles, parameters, runTab.commandBox)
        except Exception as err:
            QtGui.QMessageBox.information(self.view, "Error",
                                          str(err) + "Cannot start Analysis!")
            Helper.error(str(err) + "\n creating rnaEditor Object Failed!",
                         textField=runTab.commandBox)
        currentIndex = self.view.tabMainWindow.count()

        # self.view.tabMainWindow.addTab(self.runTab, "Analysis"+ str(Helper.assayCount))
        self.view.tabMainWindow.addTab(runTab,
                                       sampleName + " " + str(currentIndex))
        Helper.runningThreads.append(assay)

        assay.start()

        self.view.connect(assay, QtCore.SIGNAL("taskDone"), self.openAnalysis)
コード例 #33
0
ファイル: word2vec.py プロジェクト: iamweiweishi/sentiment
def word2vec():
    # hyperparameters - TODO - place into FLAGS (tensorflow website has examples)
    batch_size = 128  # how many target/context words to get in each batch
    embedding_size = 128  # Dimension of the embedding vector.
    skip_window = 1  # How many words to consider left and right - context size
    num_skips = 2  # How many times to reuse an input to generate a label
    # TAKEN FROM TF WEBSITE EXAMPLE:
    # We pick a random validation set to sample nearest neighbors. here we limit the
    # validation samples to the words that have a low numeric ID, which by
    # construction are also the most frequent.
    valid_size = 16  # Random set of words to evaluate similarity on.
    valid_window = 100  # Only pick dev samples in the head of the distribution.
    valid_examples = np.array(random.sample(range(valid_window), valid_size))
    num_sampled = 64  # Number of negative examples to sample.

    num_steps = 50001  # steps to run for
    steps_per_checkpoint = 50  # save the params every 50 steps.

    # prep work
    basedir = os.getcwd()

    #pull the data and get it into a usable format.
    get_imdb_data(basedir)

    data, count, dictionary, reverse_dictionary = build_dataset(basedir)
    # save the dictionary to file - very important for Data Processor
    Helper.store_stuff(dictionary, "dictionary.pickle", reverse_dictionary,
                       "reverse_dictionary.pickle")
    print('Most common words (+UNK)', count[:5])
    print('Sample data', data[:10])
    batch_tester(data, reverse_dictionary)
    print('three index', dictionary['three'])

    ckpt_path = os.path.join(basedir, 'checkpoints')
    if not os.path.exists(ckpt_path):
        os.makedirs(ckpt_path)
    ckpt_embed = os.path.join(ckpt_path, "embeddings")
    if not os.path.exists(ckpt_embed):
        os.makedirs(ckpt_embed)
    # actual computation
    # TODO refactor this!
    # could follow guidelines here https://danijar.com/structuring-your-tensorflow-models/
    graph = tf.Graph()
    with graph.as_default():
        # variable to track progress
        global_step = tf.Variable(0, trainable=False)

        # Input data.
        train_dataset = tf.placeholder(tf.int32, shape=[batch_size])
        train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
        valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
        with tf.device('/cpu:0'):
            # Variables.
            embeddings = tf.Variable(tf.random_uniform(
                [vocabulary_size, embedding_size], -1.0, 1.0),
                                     name="embeddings")
            nce_weights = tf.Variable(
                tf.truncated_normal([vocabulary_size, embedding_size],
                                    stddev=1.0 / math.sqrt(embedding_size)))
            nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

            # Model.
            # Look up embeddings for inputs.
            # note that the embeddings are Variable params that will
            # be optimised!
            embed = tf.nn.embedding_lookup(embeddings, train_dataset)
        # Compute the nce loss, using a sample of the negative labels each time.
        # tried using sampled_softmax_loss, but performance was worse, so decided
        # to use NCE loss instead. Might be worth some more testing, especially with
        # the hyperparameters (ie num_sampled), to see what gives the best performance.
        # tuning these params is a TODO.
        loss = tf.reduce_mean(
            tf.nn.nce_loss(nce_weights, nce_biases, embed, train_labels,
                           num_sampled, vocabulary_size))

        # PART BELOW LIFTED FROM TF EXAMPLES
        # Optimizer.
        # Note: The optimizer will optimize the nce weights AND the embeddings.
        # This is because the embeddings are defined as a variable quantity and the
        # optimizer's `minimize` method will by default modify all variable quantities
        # that contribute to the tensor it is passed.
        # See docs on `tf.train.Optimizer.minimize()` for more details.
        optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(
            loss, global_step=global_step)

        # Compute the similarity between minibatch examples and all embeddings.
        # We use the cosine distance:
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                                  valid_dataset)
        similarity = tf.matmul(valid_embeddings,
                               tf.transpose(normalized_embeddings))

    # This helps us terminate early if training started before.
    started_before = False
    with tf.Session(graph=graph) as session:
        # want to save the overall state and the embeddings for later.
        # I think we can do this in one, but I haven't had time to test this yet.
        # TODO make this a bit more efficient, avoid having to save stuff twice.
        # NOTE - this part is very closely coupled with the lstm.py script, as it
        # reads the embeddings from the location specified here. Might be worth
        # relaxing this dependency and passing the save location as a variable param.
        ckpt = tf.train.get_checkpoint_state(ckpt_path)
        saver = tf.train.Saver(tf.all_variables())
        saver_embed = tf.train.Saver({'embeddings': embeddings})
        if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
            print("Reading model parameters from {0}".format(
                ckpt.model_checkpoint_path))
            saver.restore(session, ckpt.model_checkpoint_path)
            print("done")
            started_before = True
        else:
            print("Creating model with fresh parameters.")
            tf.initialize_all_variables().run()
            print('Initialized')
        average_loss = 0
        for step in range(num_steps):
            batch_data, batch_labels = generate_batch(data, batch_size,
                                                      num_skips, skip_window)
            feed_dict = {train_dataset: batch_data, train_labels: batch_labels}
            _, l = session.run([optimizer, loss], feed_dict=feed_dict)
            average_loss += l

            if step >= 10000 and (average_loss / 2000) < 5 and started_before:
                print('early finish as probably loaded from earlier')
                break

            if step % steps_per_checkpoint == 0:
                # save stuff
                checkpoint_path = os.path.join(ckpt_path, "model_ckpt")
                embed_path = os.path.join(ckpt_embed, "embeddings_ckpt")
                saver.save(session, checkpoint_path, global_step=global_step)
                saver_embed.save(session, embed_path)
            if step % 2000 == 0:
                if step > 0:
                    average_loss = average_loss / 2000
                # The average loss is an estimate of the loss over the last 2000 batches.
                print('Average loss at step %d: %f' % (step, average_loss))
                average_loss = 0

                # note that this is expensive (~20% slowdown if computed every 500 steps)
            if step % 10000 == 0:
                sim = similarity.eval()
                for i in range(valid_size):
                    valid_word = reverse_dictionary[valid_examples[i]]
                    top_k = 8  # number of nearest neighbors
                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                    log = 'Nearest to %s:' % valid_word
                    for k in range(top_k):
                        close_word = reverse_dictionary[nearest[k]]
                        log = '%s %s,' % (log, close_word)
                    print(log)

        final_embeddings = normalized_embeddings.eval()
        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
        plot_only = 500
        low_dim_embs = tsne.fit_transform(final_embeddings[1:plot_only + 1, :])
        labels = [reverse_dictionary[i] for i in range(plot_only)]
        plot_with_labels(low_dim_embs, labels)
コード例 #34
0
	def __init__(self, arg):
		self.withDependencies = arg
		self.helper = Helper()
コード例 #35
0
    def getQueryTermFrequency(self, query):
        query_tf = defaultdict(int)
        for term in query.split():
            if term not in query_tf:
                query_tf[term] += 1
        return query_tf

    def getDocumentsContainingTerm(self, queryTF):
        documents_containing_term = defaultdict()
        for term in queryTF:
            documents_containing_term[term] = r.unigram_inverted_index[term]

        return documents_containing_term

    def performQueryTfIdf(self):
        self.queries = r.get_stemmed_queries()
        for query in self.queries:
            self.queries[query] = r.parse_query(self.queries[query])
            self.queryTF = self.getQueryTermFrequency(self.queries[query])
            self.inverted_list = self.getDocumentsContainingTerm(self.queryTF)
            self.getTfIdf(self.queryTF, self.inverted_list, query,
                          self.queries[query])
            # self.getTfIdf (self.queries[query],query)

        # for query in self.queries:
        # 	self.getTfIdf (self.queries[query],query)


r = Helper()
t = TfIdf()
コード例 #36
0
import AddressTweet
import operator
import os
from collections import defaultdict
from Helper import Helper

if __name__ == '__main__':
    helper = Helper()

    addressTweet = AddressTweet.AddressTweet()
    # print "get hashtags..."
    # addressTweet.getHashtags('NYCattack')
    # print "get userIdName..."
    # addressTweet.getUserName('NYCattack')

    top10HashTags = helper.loadPickle(
        os.path.join('NYCattack', "top10HashTags.pkl"))
    addressTweet.getPlot(top10HashTags, 'NYCattack', "top10HashTags.png", True)
    print "top10HashTags.png has been saved..."

    top10UserName = helper.loadPickle(
        os.path.join('NYCattack', "top10UserName.pkl"))
    addressTweet.getPlot(top10UserName, 'NYCattack', "top10UserName.png",
                         False)
    print "top10UserName.png has been saved..."
コード例 #37
0
    def splitByBed(self, bedFile):
        '''
        returns overlaps and nonOverlaps from bed file features
        :param bedFile: as string or file
        :param getNonOverlaps: boolean
        '''

        if type(bedFile) == str:
            bedFile = open(bedFile)
        elif type(bedFile) != file:
            raise TypeError(
                "bedFile has wrong type, need str or file, %s found" %
                type(bedFile))

        startTime = Helper.getTime()
        Helper.info(
            "[%s] Split Variants by Bed File %s" %
            (startTime.strftime("%c"), bedFile.name), self.logFile,
            self.textField)

        variantsByChromosome = self.getVariantListByChromosome()
        overlapSet = set()
        i = 0
        for line in bedFile:

            try:
                sl = line.split("\t")
                #if "\t" in line else line.split(" ")
                chromosome, start, stop = sl[:3]
                start, stop = (int(start), int(stop))
            except ValueError:
                raise ValueError("Error in line '%s'" % line)

            for v in variantsByChromosome[chromosome]:
                if start < v.position < stop:
                    overlapSet.add((v.chromosome, v.position, v.ref, v.alt))
            i += 1
            if i % 100000 == 0:
                Helper.status("%s Bed Feautes parsed" % i, self.logFile,
                              self.textField, "grey")

        Helper.info("finished parsing Bed file", self.logFile, self.textField)
        Helper.printTimeDiff(startTime, self.logFile, self.textField)

        #nonOverlapSet = set(self.variantDict.keys()) - overlapSet #delete all accept the ones which are overlapping

        overlaps = {
            key: self.variantDict[key]
            for key in self.variantDict if key in overlapSet
        }

        Helper.info("finished creating overlaps", self.logFile, self.textField)
        Helper.printTimeDiff(startTime, self.logFile, self.textField)

        nonOverlaps = {
            key: self.variantDict[key]
            for key in self.variantDict if key not in overlapSet
        }
        """
        overlaps={}
        for variantTuple in overlapSet:
            #del self.variantDict[variantTuple]
            overlaps[variantTuple]=self.variantDict[variantTuple]
        
        nonOverlaps={}
        for variantTuple in nonOverlapSet:
            nonOverlaps[variantTuple]=self.variantDict
        """

        Helper.printTimeDiff(startTime, self.logFile, self.textField)
        return overlaps, nonOverlaps
コード例 #38
0
import Helper.Helper as h
import csv
import datetime
import os

URLCRIPTO = 'https://m.investing.com/crypto/'
URLDOLAR = 'https://m.investing.com/currencies/usd-brl'
h.get(URLCRIPTO)

hora = datetime.datetime.now().strftime("%d-%m-%Y %H:%M")
lista = h.listxpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr')
tamanholista = len(lista)

for i in range(0,tamanholista):
    rank = h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[1]').text
    nomeMoeda = h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[2]').text
    valor = str(h.xpath('/html/body/div[1]/div[1]/section/div/div/div/table/tbody/tr['+str(i+1)+']/td[3]').text)
    valor = valor.replace(',','')

    file_exists = os.path.isfile("Cripto.csv")
    with open("Cripto.csv", 'a', newline='') as saida:
        headers = ['Rank', 'NomeMoeda', 'Valor-USD','Hora']
        writer = csv.DictWriter(saida, delimiter=';', lineterminator='\n', fieldnames=headers)
        if not file_exists:
            writer.writeheader()
        writer.writerow({'Rank': rank, 'NomeMoeda': nomeMoeda, 'Valor-USD': valor, 'Hora': hora})



h.get(URLDOLAR)
cotacaoDolar = str(h.xpath('//*[@id="siteWrapper"]/div[1]/section[2]/div[4]/div[2]/span[1]').text)
コード例 #39
0
ファイル: RNAEditor.py プロジェクト: makijata/RNAEditor
    def printParameters(self):

        Helper.info("*** Start RnaEditor with: ***", self.logFile,self.textField) 
        if self.fastqFiles[0].endswith(".bam"):
            Helper.info("\t Bam File: " + self.fastqFiles[0],self.logFile,self.textField)
        else:
            if self.params.paired:
                Helper.info("\t FastQ-File_1: " + self.fastqFiles[0],self.logFile,self.textField)
                Helper.info("\t FastQ-File_2: " + self.fastqFiles[1],self.logFile,self.textField)
            else:
                Helper.info("\t FastQ-File: " + self.fastqFiles[0],self.logFile,self.textField)
        Helper.info("\t outfilePrefix:" + self.params.output,self.logFile,self.textField)
        Helper.info("\t refGenome:" + self.params.refGenome,self.logFile,self.textField)
        Helper.info("\t dbsnp:" + self.params.dbsnp,self.logFile,self.textField)
        Helper.info("\t sourceDir:" + self.params.sourceDir,self.logFile,self.textField)
        Helper.info("\t threads:" + self.params.threads,self.logFile,self.textField)
        Helper.info("\t maxDiff:" + self.params.maxDiff,self.logFile,self.textField)
        Helper.info("\t seedDiff:" + self.params.seedDiff,self.logFile,self.textField)
        Helper.info("\t paired:" + str(self.params.paired),self.logFile,self.textField)
        Helper.info("\t keepTemp:" + str(self.params.keepTemp),self.logFile,self.textField)
        Helper.info("\t overwrite:" + str(self.params.overwrite),self.logFile,self.textField)
        Helper.info("",self.logFile,self.textField)
コード例 #40
0
from Helper import Helper

correct_count = 0
not_correct_count = 0
wrong_correct_count = 0
wrong_not_correct_count = 0
wrong_data = []

conn = pymysql.connect(host='localhost',
                       port=3306,
                       user='******',
                       passwd='',
                       db='small_rekomendacyjny')
cur = conn.cursor()
cur.execute("SELECT * FROM help WHERE checked = 5")
svc = Helper.train_svm()
for row in cur:
    print('########################################################')
    print(row)
    correct = Helper.one_check(mov_id=row[1], us_id=row[0], rat=float(row[2]))
    print(correct)
    if not correct:
        if row[4] == '0':
            print('NOT CORRECT!!! OK')
            not_correct_count += 1
            wrong_data.append([row[0], row[1], float(row[2])])
            #Helper.repare_one(svc=svc, movie_id=row[1], user_id=row[0], rating=float(row[2]))
        else:
            print("WRONG!!! SHOULD BE CORRECT!")
            wrong_not_correct_count += 1
            wrong_data.append([row[0], row[1], float(row[2])])
コード例 #41
0
    backbones = []
    SnmpProtocol = SnmpProtocol()
    for record in records:
        switches = []
        backbone = Backbone(str(record[0]), record[1], record[2], record[3])
        backbones.append(backbone)

for backbone in backbones:
    print(backbone.id)
    print(backbone.binaad)
    print(backbone.ip)
    switches = backbone.switches
    for switchsingle in switches:
        stmt2 = SnmpProtocol.execute(switchsingle.ip,'1.3.6.1.2.1.17.7.1.2.2.1.2','-v "INTEGER: 418"')
        eachline = stmt2.splitlines()
        print("SwitchIp:"+switchsingle.ip)
        for i in eachline:
            clients = []
            #maci convert et ve vlan çek
            rtr = Helper.decimaltohex(i)
            mac = rtr[0]
            vlan = str(rtr[1])
            #port string olarak bul
            port = Helper.findport(i,switchsingle.ip,SnmpProtocol)
            client = Client(mac,port,vlan)
            clients.append(client)
            switchsingle.setClients(clients)
            for client in clients:
                print("Mac: "+client.mac + " Port: "+client.port+ " Vlan: "+client.vlan)

コード例 #42
0
# !/usr/bin/env python
# _*_ coding:utf-8 _*_

__author__ = 'Hongrui'

import paramiko, re, os, sys, time
import threading
from Helper import Helper
Helper = Helper()

username = Helper.get_credential()['username']
passwd = Helper.get_credential()['password']
host_info = Helper.get_hostinfo()


#Multi-threads to execute the commands
def thread_run(cmd):
    threads = []
    print 'Now Begining......'
    for v in host_info.values():
        print 'ip address:', v[0]
        ip = v[0]
        t = threading.Thread(target=ssh2, args=(ip, username, passwd, cmd))
        threads.append(t)
    for t in threads:
        t.setDaemon(True)
        t.start()
    for t in threads:
        t.join()
    print 'All Command Executed......\n'
コード例 #43
0
    def configure(self):
        helper = Helper()
        if not helper.checkFile('/etc/bash_completion.d/git-completion.bash'):
            print "-- add bash completion"
            helper.wget(
                'https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash',
                '/etc/bash_completion.d/')

        if 'name' in self.attrs:
            print "-- set your name in git config"
            helper.execute('git config --global user.name "' +
                           self.attrs['name'] + '"')

        if 'email' in self.attrs:
            fileName = helper.homeFolder() + '.ssh/id_rsa'
            print "-- set your email in git config"
            helper.execute('git config --global user.email "' +
                           self.attrs['email'] + '"')
            if 'passphrase' in self.attrs and len(
                    self.attrs['passphrase']) > 4:
                print "-- create ssh key for auto-authorization (add string below to https://github.com/settings/ssh)"
                if not helper.checkFile(fileName):
                    helper.execute('mkdir ' + helper.homeFolder() + '.ssh')
                    helper.execute('ssh-keygen -f "' + fileName + '" -N "' +
                                   self.attrs['passphrase'] + '" -t rsa -C "' +
                                   self.attrs['email'] + '"')
                print helper.execute('cat ' + fileName + '.pub')
コード例 #44
0
ファイル: TreeDriver.py プロジェクト: chriswtanner/CRETE
    def __init__(self, scope, num_dirs, opt="adagrad", lr=0.025):
        print("num_dirs:", num_dirs)
        sub_dir = "ecb_" + scope + "/"

        # init stuff
        print("TORCH VERSION:", torch.__version__)
        global args
        self.args = config.parse_known_args()

        self.args.cuda = self.args.cuda and torch.cuda.is_available()
        device = torch.device("cuda:0" if self.args.cuda else "cpu")
        torch.manual_seed(self.args.seed)
        random.seed(self.args.seed)

        print("TREELSTM:", opt, "lr:", lr)

        # paths
        train_dir = os.path.join(self.args.data, str(num_dirs), 'train/',
                                 sub_dir)
        dev_dir = os.path.join(self.args.data, str(num_dirs), 'dev/', sub_dir)
        test_dir = os.path.join(self.args.data, str(num_dirs), 'test/',
                                sub_dir)

        print("train_dir:", train_dir)
        print("dev_dir:", dev_dir)

        # builds vocabulary
        sick_vocab_file = Helper.build_entire_vocab(
            os.path.join(self.args.data, str(num_dirs), 'sick.vocab'),
            train_dir, dev_dir, test_dir)
        vocab = Vocab(filename=sick_vocab_file,
                      data=[
                          Constants.PAD_WORD, Constants.UNK_WORD,
                          Constants.BOS_WORD, Constants.EOS_WORD
                      ])
        print('==> SICK vocabulary size : %d ' % vocab.size())

        # loads SICKDataset: Trees, sentences, and labels
        self.train_dataset = Helper.load_data(
            train_dir,
            os.path.join(self.args.data, str(num_dirs), 'sick_train.pth'),
            vocab, self.args.num_classes)
        self.dev_dataset = Helper.load_data(
            dev_dir, os.path.join(self.args.data, str(num_dirs),
                                  'sick_dev.pth'), vocab,
            self.args.num_classes)
        self.test_dataset = Helper.load_data(
            test_dir,
            os.path.join(self.args.data, str(num_dirs), 'sick_test.pth'),
            vocab, self.args.num_classes)

        # creates the TreeLSTM
        model = SimilarityTreeLSTM(vocab.size(), self.args.input_dim, self.args.mem_dim, self.args.hidden_dim, \
          self.args.num_classes, self.args.sparse, self.args.freeze_embed, vocab)
        criterion = nn.KLDivLoss()  #nn.CrossEntropyLoss()

        # loads glove embeddings
        emb = Helper.load_embeddings(
            self.args,
            os.path.join(self.args.data, str(num_dirs), 'sick_embed.pth'),
            vocab, device)

        # sets up the model
        model.emb.weight.data.copy_(
            emb)  # plug these into embedding matrix inside model
        model.to(device)
        criterion.to(device)
        opt = optim.Adagrad(filter(lambda p: p.requires_grad, \
           model.parameters()), lr=lr, weight_decay=self.args.wd)

        if opt == "adam":
            opt = optim.Adam(filter(lambda p: p.requires_grad, \
              model.parameters()), lr=lr)

        self.metrics = Metrics(self.args.num_classes)

        # create trainer object for training and testing
        self.trainer = Trainer(self.args, model, criterion, opt, device, vocab)
コード例 #45
0
 def printAttributes(self):
     print
     Helper.info("*** MAP READS WITH FOLLOWING ATTRIBUTES ***",
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     if self.rnaEdit.params.paired:
         Helper.info("\t FastQ-File_1: " + self.fastqFile1,
                     self.rnaEdit.logFile, self.rnaEdit.textField)
         Helper.info("\t FastQ-File_2: " + self.fastqFile2,
                     self.rnaEdit.logFile, self.rnaEdit.textField)
     else:
         Helper.info("\t FastQ-File: " + self.fastqFile,
                     self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t outfilePrefix:" + self.rnaEdit.params.output,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t refGenome:" + self.rnaEdit.params.refGenome,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t dbsnp:" + self.rnaEdit.params.dbsnp,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t sourceDir:" + self.rnaEdit.params.sourceDir,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t threads:" + self.rnaEdit.params.threads,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t maxDiff:" + self.rnaEdit.params.maxDiff,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t seedDiff:" + self.rnaEdit.params.seedDiff,
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t paired:" + str(self.rnaEdit.params.paired),
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t keepTemp:" + str(self.rnaEdit.params.keepTemp),
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("\t overwrite:" + str(self.rnaEdit.params.overwrite),
                 self.rnaEdit.logFile, self.rnaEdit.textField)
     Helper.info("", self.rnaEdit.logFile, self.rnaEdit.textField)
コード例 #46
0
ファイル: RNAEditor.py プロジェクト: makijata/RNAEditor
    def checkDependencies(self):
        """checks if all files are there
        if all programs are installed properly and if the output directory is writable"""
        try:
            self.logFile=open(self.params.output + ".log","w+")
        except IOError:
            Helper.error("Cannot open Log File", textField=self.textField)

        if type(self.fastqFiles) == list:
            self.fastqFiles=self.fastqFiles
        elif type(self.fastqFile) == str:
            self.fastqFiles=[self.fastqFiles]
        else:
            Helper.error("FastQ File has wrong variable type",self.logFile,self.textField)
        
        for file in self.fastqFiles:
            if not os.path.isfile(file):
                Helper.error("Could not find: %s" %file,self.logFile,self.textField)
            
        '''
        Checks the existence of the necessary packages and tools
        :param sourceDir: folder which contains all the software
        '''
        Helper.newline(1)
        Helper.info("CHECK DEPENDENCIES",self.logFile,self.textField)
        
        #check if all tools are there
        if not os.path.isfile(self.params.sourceDir+"bwa"):
            Helper.error("BWA not found in %s" % self.params.sourceDir,self.logFile,self.textField)
        if not os.path.isfile(self.params.sourceDir+"picard-tools/SortSam.jar"):
            Helper.error("SortSam.jar not found in %s" % self.params.sourceDir+"picard-tools",self.logFile,self.textField)
        if not os.path.isfile(self.params.sourceDir+"picard-tools/MarkDuplicates.jar"):
            Helper.error("MarkDuplicates.jar not found in %s" % self.params.sourceDir+"picard-tools",self.logFile,self.textField)
        if not os.path.isfile(self.params.sourceDir+"GATK/GenomeAnalysisTK.jar"):
            Helper.error("GenomeAnalysisTK.jar not found in %s" % self.params.sourceDir+"GATK/",self.logFile,self.textField)
        if not os.path.isfile(self.params.sourceDir+"blat"):
            Helper.error("blat not found in %s" % self.params.sourceDir,self.logFile,self.textField)
        if not os.path.isfile(self.params.sourceDir+"samtools"):
            Helper.error("samtools not found in %s" % self.params.sourceDir,self.logFile,self.textField)
        if not os.system("java -version")==0:
            Helper.error("Java could not be found, Please install java",self.logFile,self.textField)
        
        
        
        #check if all files are there
        if not os.path.isfile(self.params.refGenome):
            Helper.error("Could not find Reference Genome in %s: " % self.params.refGenome,self.logFile,self.textField)
        
        # Files for BWA
        if not os.path.isfile(self.params.refGenome+".amb"):
            Helper.warning("Could not find %s.amb" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField)
        if not os.path.isfile(self.params.refGenome+".ann"):
            Helper.warning("Could not find %s.ann" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField)
        if not os.path.isfile(self.params.refGenome+".bwt"):
            Helper.warning("Could not find %s.bwt" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField)
        if not os.path.isfile(self.params.refGenome+".pac"):
            Helper.warning("Could not find %s.pac" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField)
        if not os.path.isfile(self.params.refGenome+".sa"):
            Helper.warning("Could not find %s.sa" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'bwa index %s' to create it" % self.params.refGenome,self.logFile,self.textField)

        
        #Files for GATK
        
        
        if self.params.refGenome.endswith("fasta"):
            if not os.path.isfile(self.params.refGenome.replace(".fasta",".dict")):
                Helper.warning("Could not find %s" % self.params.refGenome.replace(".fasta",".dict"),self.logFile,self.textField)
                Helper.error("run: 'java -jar %spicard-tools/CreateSequenceDictionary.jar R=%s  O= %s' to create it" % (self.params.sourceDir,self.params.refGenome,self.params.refGenome.replace(".fastq",".dict")),self.logFile,self.textField)
        elif self.params.refGenome.endswith("fa"):
            if not os.path.isfile(self.params.refGenome.replace(".fa",".dict")):
                Helper.warning("Could not find %s" % self.params.refGenome.replace(".fa",".dict"),self.logFile,self.textField)
                Helper.error("run: 'java -jar %spicard-tools/CreateSequenceDictionary.jar R=%s  O= %s' to create it" % (self.params.sourceDir,self.params.refGenome,self.params.refGenome.replace(".fa",".dict")),self.logFile,self.textField)
        else:
            Helper.error("RefGenome has wrong suffix. Either '.fa' or '.fasta'")
        if not os.path.isfile(self.params.refGenome+".fai"):
            Helper.warning("Could not find %s.sai" % self.params.refGenome,self.logFile,self.textField)
            Helper.error("run: 'samtools faidx %s' to create it" % self.params.refGenome,self.logFile,self.textField)
    
        #SNP databases
        if not os.path.isfile(self.params.dbsnp):
            Helper.error("Could not find dbSNP database %s: " % self.params.dbsnp,self.logFile,self.textField)
        if not os.path.isfile(self.params.hapmap) and self.params.hapmap != "None":
            Helper.error("Could not find Hapmap database %s: " % self.params.hapmap,self.logFile,self.textField)
        if not os.path.isfile(self.params.omni) and self.params.omni != "None":
            Helper.error("Could not find Omni database %s: " % self.params.omni,self.logFile,self.textField)
        if not os.path.isfile(self.params.esp) and self.params.esp != "None":
            Helper.error("Could not find 1000G database %s: " % self.params.esp,self.logFile,self.textField)
            
        #region Files
        if not os.path.isfile(self.params.aluRegions):
            Helper.error("Could not find %s: " % self.params.aluRegions,self.logFile,self.textField)
            
        if not os.path.isfile(self.params.gtfFile):
            Helper.error("Could not find %s: " % self.params.gtfFile,self.logFile,self.textField)

        
        Helper.info("Dependencies satisfied", self.logFile, self.textField)
コード例 #47
0
def checkDependencies(args):
    '''
    Checks the existence of the necessary packages and tools
    :param sourceDir: folder which contains all the software
    '''
    Helper.newline(1)
    Helper.info("CHECK DEPENDENCIES")

    #check if all tools are there
    if not os.path.isfile(args.sourceDir + "bwa"):
        Helper.error("BWA not found in %s" % args.sourceDir)
    if not os.path.isfile(args.sourceDir + "picard-tools/SortSam.jar"):
        Helper.error("SortSam.jar not found in %s" % args.sourceDir +
                     "picard-tools")
    if not os.path.isfile(args.sourceDir + "picard-tools/MarkDuplicates.jar"):
        Helper.error("MarkDuplicates.jar not found in %s" % args.sourceDir +
                     "picard-tools")
    if not os.path.isfile(args.sourceDir + "GATK/GenomeAnalysisTK.jar"):
        Helper.error("GenomeAnalysisTK.jar not found in %s" % args.sourceDir +
                     "GATK/")
    if not os.path.isfile(args.sourceDir + "samtools"):
        Helper.error("samtools not found in %s" % args.sourceDir)
    if not os.system("java -version") == 0:
        Helper.error("Java could not be found, Please install java")

    #check if all files are there
    if not os.path.isfile(args.RefGenome):
        Helper.error("Could not find Reference Genome in %s: " %
                     args.RefGenome)
    # Files for BWA
    if not os.path.isfile(args.RefGenome + ".amb"):
        Helper.error("Could not find %s.amb" % args.RefGenome)
        Helper.error("run: 'bwa index %s' to create it" % args.RefGenome)
    if not os.path.isfile(args.RefGenome + ".ann"):
        Helper.error("Could not find %s.ann" % args.RefGenome)
        Helper.error("run: 'bwa index %s' to create it" % args.RefGenome)
    if not os.path.isfile(args.RefGenome + ".bwt"):
        Helper.error("Could not find %s.bwt" % args.RefGenome)
        Helper.error("run: 'bwa index %s' to create it" % args.RefGenome)
    if not os.path.isfile(args.RefGenome + ".pac"):
        Helper.error("Could not find %s.pac" % args.RefGenome)
        Helper.error("run: 'bwa index %s' to create it" % args.RefGenome)
    if not os.path.isfile(args.RefGenome + ".sa"):
        Helper.error("Could not find %s.sa" % args.RefGenome)
        Helper.error("run: 'bwa index %s' to create it" % args.RefGenome)

    #Files for GATK
    if not os.path.isfile(args.RefGenome.replace(".fastq", ".dict")):
        Helper.error("Could not find %s" %
                     args.RefGenome.replace(".fastq", ".dict"))
        Helper.error(
            "run: 'java -jar %s/picard-tools/CreateSequenceDictionary.jar R=%s  O= %s.dict' to create it"
            % (args.sourceDir, args.RefGenome,
               args.RefGenome.replace(".fastq", ".dict")))
    if not os.path.isfile(args.RefGenome + ".fai"):
        Helper.error("Could not find %s.fai" % args.RefGenome)
        Helper.error("run: 'samtools faidx %s' to create it" % args.RefGenome)

    #SNP databases
    if not os.path.isfile(args.dbsnp):
        Helper.error("Could not find %s: " % args.dbsnp)
コード例 #48
0
ファイル: RNAEditor.py プロジェクト: makijata/RNAEditor
 def stopSafely(self):
     self.quit()
     Helper.info("Analysis was stopped by User", self.logFile, self.textField)
コード例 #49
0
ファイル: CallEditingSites.py プロジェクト: aaiezza/RNAEditor
    def startAnalysis(self):
        '''Proceeds all the steps to detect editing Sites from a bam File
        
        @return: 0 on success and 1 if analysis was canceled by user
        '''
        '''check if result file already exists'''
        if os.path.isfile(self.rnaEdit.params.output + ".editingSites.clusters"
                          ) and self.rnaEdit.params.overwrite == False:
            Helper.status("\t [SKIP] Final result file already exist",
                          self.rnaEdit.logFile, self.rnaEdit.textField,
                          "green")
            return 1

        #Rough variant calling with GATK
        self.printAttributes()

        #create transcriptome from GTF-File
        #startTime = Helper.getTime()
        #Helper.info(" [%s] Parsing Gene Data from %s" % (startTime.strftime("%c"),self.rnaEdit.params.gtfFile),self.rnaEdit.logFile,self.rnaEdit.textField)

        #duration = Helper.getTime() -startTime
        #Helper.info(" Finished parsing in %s" % (str(duration)),self.rnaEdit.logFile,self.rnaEdit.textField)

        vcfFile = self.rnaEdit.params.output + ".vcf"
        cmd = [
            "java", "-Xmx12G", "-jar",
            self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T",
            "UnifiedGenotyper", "-R", self.rnaEdit.params.refGenome, "-glm",
            "SNP", "-I", self.bamFile, "-D", self.rnaEdit.params.dbsnp, "-o",
            vcfFile, "-metrics", self.rnaEdit.params.output + ".snp.metrics",
            "-nt", self.rnaEdit.params.threads, "-l", "ERROR",
            "-stand_call_conf", self.rnaEdit.params.standCall,
            "-stand_emit_conf", self.rnaEdit.params.standEmit, "-A",
            "Coverage", "-A", "AlleleBalance", "-A", "BaseCounts"
        ]
        #print cmd
        Helper.proceedCommand("Call variants", cmd, self.bamFile, vcfFile,
                              self.rnaEdit)

        #################################
        ###   Delete known SNPs!!!    ###
        #################################
        #check if file already exists
        if not os.path.isfile(self.rnaEdit.params.output + ".noSNPs.vcf"
                              ) or self.rnaEdit.params.overwrite == True:
            #read in initial SNPs
            variants = VariantSet(vcfFile, self.rnaEdit.logFile,
                                  self.rnaEdit.textField)
            '''delete SNPs from dbSNP'''
            variants.deleteOverlapsFromVcf(self.rnaEdit.params.dbsnp)
            '''delete variants from 1000 Genome Project'''
            if self.rnaEdit.params.omni != "None":
                variants.deleteOverlapsFromVcf(self.rnaEdit.params.omni)
            '''delete variants from UW exome calls'''
            if self.rnaEdit.params.esp != "None":
                variants.deleteOverlapsFromVcf(self.rnaEdit.params.esp)
            '''annotate all Variants'''
            #variants.annotateVariantDict(self.genome)
            '''save variants if something goes wrong'''
            variants.printVariantDict(self.rnaEdit.params.output +
                                      ".noSNPs.vcf")
        else:
            if not os.path.isfile(self.rnaEdit.params.output +
                                  ".noReadEdges.vcf"):
                variants = VariantSet(
                    self.rnaEdit.params.output + ".noSNPs.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        ###############################################
        ###   Delete variants from read edges!!!    ###
        ###############################################
        if not os.path.isfile(self.rnaEdit.params.output + ".noReadEdges.vcf"
                              ) or self.rnaEdit.params.overwrite == True:
            '''erase artificial missmatches at read-edges from variants'''
            variants.removeEdgeMismatches(self.bamFile,
                                          self.rnaEdit.params.edgeDistance, 25)
            #self.removeEdgeMissmatches(variants, self.bamFile, self.rnaEdit.params.edgeDistance, 25)
            '''save variants if something goes wrong'''
            variants.printVariantDict(self.rnaEdit.params.output +
                                      ".noReadEdges.vcf")
        else:
            if not os.path.isfile(self.rnaEdit.params.output +
                                  ".alu.vcf") or not os.path.isfile(
                                      self.rnaEdit.params.output +
                                      ".nonAlu.vcf"):
                variants = VariantSet(
                    self.rnaEdit.params.output + ".noReadEdges.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        ###############################################
        ###   split Alu- and non-Alu Variants!!!    ###
        ###############################################

        if (not os.path.isfile(self.rnaEdit.params.output + ".alu.vcf") or
                not os.path.isfile(self.rnaEdit.params.output + ".nonAlu.vcf")
            ) or self.rnaEdit.params.overwrite == True:
            '''get non-Alu Variants'''
            nonAluVariants = copy(variants)
            #nonAluVariants.variantDict=variants.getOverlapsFromBed(self.rnaEdit.params.aluRegions,getNonOverlaps=True)
            '''get Alu Variants'''
            aluVariants = copy(variants)
            #aluVariants.variantDict=variants.getOverlapsFromBed(self.rnaEdit.params.aluRegions,getNonOverlaps=False)
            aluVariants.variantDict, nonAluVariants.variantDict = variants.splitByBed(
                self.rnaEdit.params.aluRegions)
            aluVariants.printVariantDict(self.rnaEdit.params.output +
                                         ".alu.vcf")
            nonAluVariants.printVariantDict(self.rnaEdit.params.output +
                                            ".nonAlu.vcf")
        else:
            aluVariants = VariantSet(self.rnaEdit.params.output + ".alu.vcf",
                                     self.rnaEdit.logFile,
                                     self.rnaEdit.textField)
            if not os.path.isfile(self.rnaEdit.params.output +
                                  ".noSpliceJunction.vcf"):
                nonAluVariants = VariantSet(
                    self.rnaEdit.params.output + ".nonAlu.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        #print out variants from Alu regions
        #

        ##############################################
        ###   proceed with non-Alu reads only!!!    ##
        ##############################################

        ##############################################
        ###   Remove intronic Splice junction!!!    ##
        ##############################################
        self.genome = Genome(self.rnaEdit.params.gtfFile, self.rnaEdit.logFile,
                             self.rnaEdit.textField)
        #erase variants from intronic splice junctions
        if not os.path.isfile(self.rnaEdit.params.output +
                              ".noSpliceJunction.vcf"
                              ) or self.rnaEdit.params.overwrite == True:
            self.removeIntronicSpliceJunctions(
                nonAluVariants,
                self.genome,
                distance=self.rnaEdit.params.intronDistance)
            nonAluVariants.printVariantDict(self.rnaEdit.params.output +
                                            ".noSpliceJunction.vcf")
        else:
            if not os.path.isfile(self.rnaEdit.params.output + ".noHomo.vcf"):
                nonAluVariants = VariantSet(
                    self.rnaEdit.params.output + ".noSpliceJunction.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        ##############################################
        ### erase variants from homopolymers!!! ##
        ##############################################
        if not os.path.isfile(self.rnaEdit.params.output + ".noHomo.vcf"
                              ) or self.rnaEdit.params.overwrite == True:
            self.removeHomopolymers(nonAluVariants, self.rnaEdit.params.output,
                                    4)
            nonAluVariants.printVariantDict(self.rnaEdit.params.output +
                                            ".noHomo.vcf")
        else:
            if not os.path.isfile(self.rnaEdit.params.output + ".noBlat.vcf"):
                nonAluVariants = VariantSet(
                    self.rnaEdit.params.output + ".noHomo.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        ##############################################
        ###     erase duplicate mapped reads!!!     ##
        ##############################################
        if not os.path.isfile(self.rnaEdit.params.output + ".noBlat.vcf"
                              ) or self.rnaEdit.params.overwrite == True:
            blatOutfile = self.rnaEdit.params.output + ".noBlat.vcf"
            self.blatSearch(nonAluVariants, blatOutfile, 25, 2)

            #print nonAlu variants
            nonAluVariants.printVariantDict(self.rnaEdit.params.output +
                                            ".noBlat.vcf")
        else:
            if not os.path.isfile(self.rnaEdit.params.output +
                                  ".editingSites.nonAlu.vcf"):
                nonAluVariants = VariantSet(
                    self.rnaEdit.params.output + ".noBlat.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)
                #nonAluVariants.deleteNonEditingBases()
                #nonAluVariants.printVariantDict(self.rnaEdit.params.output+".editingSites.nonAlu.vcf")
            else:
                nonAluVariants = VariantSet(
                    self.rnaEdit.params.output + ".editingSites.nonAlu.vcf",
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        #nonAluVariants.printGeneList(self.genome,self.rnaEdit.params.output+".nonAlu.gvf", printSummary=True)

        #print nonAlu editing Sites
        nonAluVariants.deleteNonEditingBases()
        nonAluVariants.annotateVariantDict(self.genome)
        nonAluVariants.printVariantDict(self.rnaEdit.params.output +
                                        ".editingIslands.bed")
        nonAluVariants.printGeneList(self.genome,
                                     self.rnaEdit.params.output +
                                     ".editingSites.nonAlu.gvf",
                                     printSummary=True)
        nonAluVariants.createClusters(eps=50, minSamples=5)
        nonAluVariants.printClusters(self.rnaEdit.params.output +
                                     ".editingIslands.bed")
        #print Alu editing Sites
        aluVariants.deleteNonEditingBases()
        aluVariants.annotateVariantDict(self.genome)
        aluVariants.printVariantDict(self.rnaEdit.params.output +
                                     ".editingSites.alu.vcf")
        aluVariants.printGeneList(self.genome,
                                  self.rnaEdit.params.output +
                                  ".editingSites.alu.gvf",
                                  printSummary=True)
        aluVariants.createClusters(eps=50, minSamples=5)
        aluVariants.printClusters(self.rnaEdit.params.output +
                                  ".editingIslands.bed")

        #combine alu and non Alu sites
        variants = aluVariants + nonAluVariants
        variants.deleteNonEditingBases()

        #print Final tables
        '''Read Genome'''

        variants.annotateVariantDict(self.genome)

        variants.printVariantDict(self.rnaEdit.params.output +
                                  ".editingSites.vcf")
        variants.printGeneList(self.genome,
                               self.rnaEdit.params.output +
                               ".editingSites.gvf",
                               printSummary=True)
        variants.createClusters(eps=self.rnaEdit.params.eps,
                                minSamples=self.rnaEdit.params.minPts)
        variants.printClusters(self.rnaEdit.params.output +
                               ".editingIslands.bed")

        return 1
コード例 #50
0
    def printGeneList(self, genome, outfile, printSummary=True):
        '''
        print List of genes with all the variants
        Gene-Variation-File
        "Gene_ID","gene_Name","SEGMENT","#CHROM","GENE_START","GENE_STOP","VAR_POS","REF","ALT","QUAL","BaseCount(A,C,T,G)"
        
        Gene Summary File
        "Gene_ID",Gene_Name,#3'UTR,#5'UTR,#EXON,'INTRON,#TOTAL
        :param genome:  object of class Genome
        :param outfile: 
        :param printSummary: boolean wether to print summary-file
        '''

        sumDict = {}

        if type(genome) != Genome:
            raise AttributeError(
                "Type of genome is %s, but has to be an object of Genome" %
                type(genome))

        if type(outfile) == str:
            try:
                outfile = open(outfile, "w")

            except IOError:
                Helper.warning("Could not open %s to write Variant" % outfile,
                               self.logFile, self.textField)
        if type(outfile) != file:
            raise AttributeError(
                "Invalid outfile type in 'printVariantDict' (need string or file, %s found)"
                % type(outfile))

        startTime = Helper.getTime()
        Helper.info(
            "[%s] Print Genes and Variants to %s" %
            (startTime.strftime("%c"), outfile.name), self.logFile,
            self.textField)

        sumFile = open(outfile.name[:outfile.name.rfind(".")] + ".summary",
                       "w")

        outfile.write("\t".join([
            "#Gene_ID", "Name", "SEGMENT", "#CHROM", "GENE_START", "GENE_STOP",
            "VAR_ID", "VAR_POS", "REF", "ALT", "QUAL", "#A", "#C", "#G", "#T",
            "Reads_Total", "Edited_Reads", "Editing_Ratio", "\n"
        ]))

        for v in self.variantDict.values():
            anno = v.attributes["GI"]
            for a in anno:
                gene, segments = a
                totalReads = str(int(sum(map(int,
                                             v.attributes["BaseCounts"]))))
                if v.ref == "A" and v.alt == "G":
                    editedReads = str(v.attributes["BaseCounts"][2])
                    ratio = str(
                        round(float(editedReads) / float(totalReads), 2))
                elif (v.ref == "T" and v.alt == "C"):
                    editedReads = str(v.attributes["BaseCounts"][1])
                    ratio = str(
                        round(float(editedReads) / float(totalReads), 2))
                else:
                    editedReads = "0"
                    ratio = "0"

                if gene == "-":
                    out = [
                        "-", "-", ",".join(segments), v.chromosome, "-", "-",
                        v.id,
                        str(v.position), v.ref, v.alt,
                        str(v.qual), "\t".join(v.attributes["BaseCounts"]),
                        totalReads, editedReads, ratio, "\n"
                    ]
                    outfile.write("\t".join(out))
                else:
                    out = [
                        gene.geneId, gene.names[0], ",".join(segments),
                        v.chromosome,
                        str(gene.start),
                        str(gene.end), v.id,
                        str(v.position), v.ref, v.alt,
                        str(v.qual), "\t".join(v.attributes["BaseCounts"]),
                        totalReads, editedReads, ratio, "\n"
                    ]
                    outfile.write("\t".join(out))

                #count variations per gene
                if gene not in sumDict:
                    sumDict[gene] = [0, 0, 0, 0, 0]

                for seg in segments:
                    if seg == "3'UTR":
                        sumDict[gene][0] += 1
                    elif seg == "5'UTR":
                        sumDict[gene][1] += 1
                    elif seg in ("coding-exon", "noncoding-exon"):
                        sumDict[gene][2] += 1
                    elif seg == "intron":
                        sumDict[gene][3] += 1
                    sumDict[gene][4] += 1

        #print number of variants per gene
        if printSummary:

            sumDictGeneIds = set()
            sumFile.write("\t".join([
                "#Gene_ID", "Name", "#3'UTR", "#5'UTR", "#EXON", "INTRON",
                "#TOTAL", "\n"
            ]))
            for gene in sumDict.keys():
                numbers = map(str, sumDict[gene])
                if gene == "-":
                    sumFile.write(
                        "\t".join(["intergenic", "-"] +
                                  ["-", "-", "-", "-", numbers[4]] + ["\n"]))
                else:
                    sumFile.write("\t".join([gene.geneId, gene.names[0]] +
                                            numbers + ["\n"]))
                    sumDictGeneIds.add(gene.geneId)
            #print non effected Genes
            #this was added to have the whole set og genes in the summary file
            #so that it is easier to compare results in Excel
            genesByGeneId = genome.getGenesByGeneID()
            a = set(genesByGeneId.keys())
            b = sumDictGeneIds
            nonEffectedGenes = a - b
            for geneId in nonEffectedGenes:
                gene = genesByGeneId[geneId]
                sumFile.write("\t".join([gene.geneId, gene.names[0]] + [
                    "0",
                    "0",
                    "0",
                    "0",
                    "0",
                ] + ["\n"]))

            ################################################################
            ############    Draw Barplots with high edited Genes ###########
            ################################################################
            '''
コード例 #51
0
ファイル: main.py プロジェクト: Uppicki/pythonTasks
import sys
from PyQt5.QtWidgets import QApplication
from MyWindow import MyWindow
from Task3Logic import Task3Logic
from Task4Logic import Task4Logic
from Helper import Helper


def task3(h):
    task3 = Task3Logic()
    lines = h.reader3task('task3.input1.txt')
    h.writer3task('task3.output1.txt', task3.logic(lines))
    arr = h.reader3task('task3.output1.txt')
    print(arr)


def task4(h):
    task4 = Task4Logic()
    lines = h.reader4task('task4.input1.txt')
    h.writer4task('task4.output1.txt', task4.logic(lines))
    arr = h.reader4task('task4.output1.txt')
    print(arr)


h = Helper()
task3(h)
task4(h)


コード例 #52
0
    def startAnalysis(self):
        recaledBamFile = self.rnaEdit.params.output + ".noDup.realigned.recalibrated.bam"
        if os.path.isfile(recaledBamFile):
            Helper.info(
                "* * * [Skipping] Mapping result File already exists * * *",
                self.rnaEdit.logFile, self.rnaEdit.textField)
            self.rnaEdit.logFile.flush()
            return recaledBamFile

        if self.rnaEdit.params.paired == True:  #For paired end sequencing
            #Align first Fastq Reads to the Genome
            saiFile1 = self.rnaEdit.params.output + "_1.sai"
            cmd = [
                self.rnaEdit.params.sourceDir + "bwa", "aln", "-t",
                self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff,
                "-k", self.rnaEdit.params.seedDiff,
                self.rnaEdit.params.refGenome, self.fastqFile1
            ]
            Helper.proceedCommand("Align first Reads with BWA", cmd,
                                  self.fastqFile1, saiFile1, self.rnaEdit)

            #Align second Fastq Reads to the Genome
            saiFile2 = self.rnaEdit.params.output + "_2.sai"
            cmd = [
                self.rnaEdit.params.sourceDir + "bwa", "aln", "-t",
                self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff,
                "-k", self.rnaEdit.params.seedDiff,
                self.rnaEdit.params.refGenome, self.fastqFile2
            ]
            Helper.proceedCommand("Align second Reads with BWA", cmd,
                                  self.fastqFile2, saiFile2, self.rnaEdit)

            #convert sai to sam
            samFile = self.rnaEdit.params.output + ".sam"
            cmd = [
                self.rnaEdit.params.sourceDir + "bwa", "sampe", "-r",
                "@RG\tID:bwa\tSM:A\tPL:ILLUMINA\tPU:HiSEQ2000",
                self.rnaEdit.params.refGenome, saiFile1, saiFile2,
                self.fastqFile1, self.fastqFile2
            ]
            Helper.proceedCommand("convert sai to sam", cmd, saiFile1, samFile,
                                  self.rnaEdit)
        elif self.rnaEdit.params.paired == False:  #For single end sequencing
            #Align Fastq Reads to the Genome
            saiFile = self.rnaEdit.params.output + ".sai"
            cmd = [
                self.rnaEdit.params.sourceDir + "bwa", "aln", "-t",
                self.rnaEdit.params.threads, "-n", self.rnaEdit.params.maxDiff,
                "-k", self.rnaEdit.params.seedDiff,
                self.rnaEdit.params.refGenome, self.fastqFile
            ]
            Helper.proceedCommand("Align Reads with BWA", cmd, self.fastqFile,
                                  saiFile, self.rnaEdit)

            #convert sai to sam
            samFile = self.rnaEdit.params.output + ".sam"

            cmd = [
                self.rnaEdit.params.sourceDir + "bwa", "samse", "-r",
                "@RG\tID:bwa\tSM:A\tPL:ILLUMINA\tPU:HiSEQ2000",
                self.rnaEdit.params.refGenome, saiFile, self.fastqFile
            ]
            #cmd = [self.rnaEdit.params.sourceDir + "bwa", "samse", self.rnaEdit.params.refGenome, saiFile, self.fastqFile]
            Helper.proceedCommand("convert sai to sam", cmd, saiFile, samFile,
                                  self.rnaEdit)

        #convert sam to bam
        unsortedBamFile = self.rnaEdit.params.output + ".unsorted.bam"
        bamFile = self.rnaEdit.params.output + ".bam"
        """
        cmd=["java", "-Xmx8


        G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/SortSam.jar", "INPUT=" + samFile, "OUTPUT=" + bamFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"]
        Helper.proceedCommand("convert sam to bam", cmd, samFile, bamFile, self.rnaEdit)
        """

        #Sort and Index Bam File
        #Helper.status("Sort Bam", self.rnaEdit.logFile,self.rnaEdit.textField)
        '''pysamSamFile = pysam.Samfile(samFile,'r')
        pysamBamFile = pysam.Samfile(unsortedBamFile,'wb', template=pysamSamFile)
        
        for read in pysamSamFile.fetch():
             pysamBamFile.write(read)'''

        #pysam.sort(samFile,"-o", bamFile)
        cmd = [
            self.rnaEdit.params.sourceDir + "samtools", "sort", samFile, "-o",
            bamFile
        ]
        Helper.proceedCommand("Sort Bam File", cmd, samFile, bamFile,
                              self.rnaEdit)

        #Helper.status("index Bam", self.rnaEdit.logFile,self.rnaEdit.textField)
        #pysam.index(bamFile)
        cmd = [self.rnaEdit.params.sourceDir + "samtools", "index", bamFile]
        Helper.proceedCommand("Index Bam File", cmd, samFile, bamFile + ".bai",
                              self.rnaEdit)

        #mark PCR duplicates
        #Helper.status("Remove Duplicates", self.rnaEdit.logFile,self.rnaEdit.textField)
        markedFile = self.rnaEdit.params.output + ".noDup.bam"
        cmd = [
            "java", "-Xmx16G", "-jar",
            self.rnaEdit.params.sourceDir + "picard-tools/MarkDuplicates.jar",
            "INPUT=" + bamFile, "OUTPUT=" + markedFile,
            "METRICS_FILE=" + self.rnaEdit.params.output + ".pcr.metrics",
            "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"
        ]
        Helper.proceedCommand("Remove PCR duplicates", cmd, bamFile,
                              markedFile, self.rnaEdit)
        """if self.rnaEdit.params.paired == False:
            pysam.rmdup("-s",bamFile,markedFile)
        else:
            pysam.rmdup(bamFile,markedFile)

        #pysam.rmdup(bamFile,markedFile)
        if self.rnaEdit.params.paired == False:
            cmd = [self.rnaEdit.params.sourceDir + "samtools", "rmdup", "-s", bamFile, markedFile]
        else:
            cmd = [self.rnaEdit.params.sourceDir + "samtools", "rmdup", bamFile, markedFile]
        Helper.proceedCommand("Index Bam File", cmd, bamFile, markedFile, self.rnaEdit)
        
        
        Helper.status("index Bam", self.rnaEdit.logFile,self.rnaEdit.textField)
        pysam.index(markedFile)
        
        cmd = [self.rnaEdit.params.sourceDir + "samtools", "index", bamFile]
        Helper.proceedCommand("Index Bam File", cmd, bamFile, markedFile+".bai", self.rnaEdit)
        #return bamFile"""

        #run Alignement with tophat
        """
        bamFile=self.rnaEdit.params.output+"/accepted_hits.bam"
        cmd=[self.rnaEdit.params.sourceDir + "tophat/tophat2", "--no-coverage-search","--keep-fasta-order", "-p", "12", "--rg-id", "A","--rg-sample","A","--rg-library","illumina","--rg-platform-unit","HiSeq", "-o", self.rnaEdit.params.output, self.rnaEdit.params.refGenome, self.fastqFile ]
        print cmd
        Helper.proceedCommand("Map reads with tophat", cmd, self.rnaEdit.params.fastqFile, bamFile, self.rnaEdit.)
        """

        #sort bam
        #sortBamFile=self.rnaEdit.params.output+".bam"
        #cmd=["java", "-Xmx4G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/SortSam.jar", "INPUT=" + bamFile, "OUTPUT=" + sortBamFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"]
        #Helper.proceedCommand("sort bam", cmd, bamFile, sortBamFile, self.rnaEdit)

        #Add read group ONLY NEEDED WHEN MAPPED WITH TOPHAT
        #rgFile=self.rnaEdit.params.output+".bam"
        #cmd=["java", "-Xmx4G", "-jar", self.rnaEdit.params.sourceDir + "picard-tools/AddOrReplaceReadGroups.jar", "INPUT=" + bamFile, "OUTPUT=" + rgFile, "SO=coordinate", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true", "ID=A", "LB=A", "SM=A", "PL=illumina", "PU=HiSeq2000", "SM=A"]
        #Helper.proceedCommand("Add read Groups", cmd, bamFile, rgFile, self.rnaEdit)

        #Identify Target Regions for realignment
        intervalFile = self.rnaEdit.params.output + ".indels.intervals"
        cmd = [
            "java", "-Xmx16G", "-jar",
            self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-nt",
            self.rnaEdit.params.threads, "-T", "RealignerTargetCreator", "-R",
            self.rnaEdit.params.refGenome, "-I", markedFile, "-o",
            intervalFile, "-l", "ERROR"
        ]
        Helper.proceedCommand("Identify Target Regions for realignment", cmd,
                              bamFile, intervalFile, self.rnaEdit)

        #Proceed Realignement
        realignedFile = self.rnaEdit.params.output + ".noDup.realigned.bam"
        cmd = [
            "java", "-Xmx16G", "-jar",
            self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T",
            "IndelRealigner", "-R", self.rnaEdit.params.refGenome, "-I",
            markedFile, "-l", "ERROR", "-targetIntervals", intervalFile, "-o",
            realignedFile
        ]
        Helper.proceedCommand("Proceed Realignement", cmd, intervalFile,
                              realignedFile, self.rnaEdit)
        """cmd=["java","-Xmx16G","-jar",self.rnaEdit.params.sourceDir + "picard-tools/MarkDuplicates.jar","INPUT=" + realignedFile, "OUTPUT=" + markedFile, "METRICS_FILE="+self.rnaEdit.params.output+".pcr.metrics", "VALIDATION_STRINGENCY=LENIENT", "CREATE_INDEX=true"]
        Helper.proceedCommand("mark PCR duplicates", cmd, realignedFile, markedFile, self.rnaEdit)
        """
        #Find Quality Score recalibration spots
        recalFile = self.rnaEdit.params.output + ".recalSpots.grp"
        cmd = [
            "java", "-Xmx16G", "-jar",
            self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T",
            "BaseRecalibrator", "-l", "ERROR", "-R",
            self.rnaEdit.params.refGenome, "-knownSites",
            self.rnaEdit.params.dbsnp, "-I", realignedFile, "-cov",
            "CycleCovariate", "-cov", "ContextCovariate", "-o", recalFile
        ]
        Helper.proceedCommand("Find Quality Score recalibration spots", cmd,
                              realignedFile, recalFile, self.rnaEdit)

        #proceed Quality Score recalibration
        cmd = [
            "java", "-Xmx16G", "-jar",
            self.rnaEdit.params.sourceDir + "GATK/GenomeAnalysisTK.jar", "-T",
            "PrintReads", "-l", "ERROR", "-R", self.rnaEdit.params.refGenome,
            "-I", realignedFile, "-BQSR", recalFile, "-o", recaledBamFile
        ]
        Helper.proceedCommand("Proceed Quality Score recalibration", cmd,
                              recalFile, recaledBamFile, self.rnaEdit)

        return recaledBamFile
コード例 #53
0
ファイル: CallEditingSites.py プロジェクト: aaiezza/RNAEditor
    def printAttributes(self):

        Helper.info("*** CALL VARIANTS WITH FOLLOWING ATTRIBUTES ***",
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t Bam-File: " + self.bamFile, self.rnaEdit.logFile,
                    self.rnaEdit.textField)
        Helper.info("\t outfilePrefix:" + self.rnaEdit.params.output,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t refGenome:" + self.rnaEdit.params.refGenome,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t dbsnp:" + self.rnaEdit.params.dbsnp,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t HapMap:" + self.rnaEdit.params.hapmap,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t 1000G Omni:" + self.rnaEdit.params.omni,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t Alu-Regions:" + self.rnaEdit.params.aluRegions,
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        Helper.info("\t sourceDir:" + self.rnaEdit.params.sourceDir,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t threads:" + self.rnaEdit.params.threads,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t StandCall:" + self.rnaEdit.params.standCall,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t standEmit:" + self.rnaEdit.params.standEmit,
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t keepTemp:" + str(self.rnaEdit.params.keepTemp),
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        Helper.info(
            "\t intronDistance:" + str(self.rnaEdit.params.intronDistance),
            self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t minPts:" + str(self.rnaEdit.params.minPts),
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info("\t eps:" + str(self.rnaEdit.params.eps),
                    self.rnaEdit.logFile, self.rnaEdit.textField)

        Helper.info("\t overwrite:" + str(self.rnaEdit.params.overwrite),
                    self.rnaEdit.logFile, self.rnaEdit.textField)
コード例 #54
0
ファイル: Nginx.py プロジェクト: Sephirothus/unfolding
 def check(self):
     return (Helper()).checkVersion(self.serviceName, '-v')
コード例 #55
0
ファイル: CallEditingSites.py プロジェクト: aaiezza/RNAEditor
    def blatSearch(self, variants, outFile, minBaseQual, minMissmatch):
        startTime = Helper.getTime()
        Helper.info(
            " [%s] Search non uniquely mapped reads" %
            (startTime.strftime("%c")), self.rnaEdit.logFile,
            self.rnaEdit.textField)

        bamFile = Samfile(self.bamFile, "rb")
        #create Fasta file for blat to remap the variant overlapping reads
        tempFasta = outFile + "_tmp.fa"
        if not os.path.isfile(tempFasta) or not os.path.getsize(
                tempFasta
        ) > 0:  #check if temFast exists and is not empty. If it exist it will not be created again
            tempFastaFile = open(tempFasta, "w+")
            mmNumberTotal = len(variants.variantDict)

            #############################################
            #########    CREATE FASTA FILE        #######
            #############################################
            Helper.info(
                " [%s] Create fasta file for blat " %
                (startTime.strftime("%c")), self.rnaEdit.logFile,
                self.rnaEdit.textField)
            counter = 1

            if len(variants.variantDict.keys()) == 0:
                Helper.error("No Variants left", self.rnaEdit.logFile,
                             self.rnaEdit.textField)

            for varKey in variants.variantDict.keys():
                variant = variants.variantDict[varKey]
                varPos = variant.position - 1
                iter = bamFile.pileup(variant.chromosome, variant.position - 1,
                                      variant.position)
                alignements = []
                for x in iter:
                    if x.pos == varPos:
                        #loop over reads of that position
                        for pileupread in x.pileups:
                            if not pileupread.is_del and not pileupread.is_refskip:
                                if pileupread.alignment.query_sequence[
                                        pileupread.
                                        query_position] == variant.alt and pileupread.alignment.query_qualities[
                                            pileupread.
                                            query_position] >= minBaseQual:
                                    #if pileupread.alignment.query_sequence[pileupread.query_position] == variant.alt:
                                    alignements.append(
                                        pileupread.alignment.seq)

                if len(alignements) >= minMissmatch:
                    missmatchReadCount = 0
                    for sequence in alignements:
                        tempFastaFile.write("> " + variant.chromosome + "-" +
                                            str(variant.position) + "-" +
                                            variant.ref + "-" + variant.alt +
                                            "-" + str(missmatchReadCount) +
                                            "\n" + sequence + "\n")
                        missmatchReadCount += 1

                counter += 1
                if counter % 1000 == 0:
                    sys.stdout.write("\r" + str(counter) + " of " +
                                     str(mmNumberTotal) + " variants done")
                    Helper.info(
                        str(counter) + " of " + str(mmNumberTotal) +
                        " variants done", self.rnaEdit.logFile,
                        self.rnaEdit.textField)
                    sys.stdout.flush()

            Helper.info("\n created fasta file " + tempFasta,
                        self.rnaEdit.logFile, self.rnaEdit.textField)
            Helper.printTimeDiff(startTime, self.rnaEdit.logFile,
                                 self.rnaEdit.textField)
            tempFastaFile.close()

        #############################
        #####   do blat search  #####
        #############################
        pslFile = outFile + ".psl"
        if not os.path.isfile(pslFile) or not os.path.getsize(pslFile) > 0:
            cmd = [
                self.rnaEdit.params.sourceDir + "blat", "-stepSize=5",
                "-repMatch=2253", "-minScore=20", "-minIdentity=0", "-noHead",
                self.rnaEdit.params.refGenome, tempFasta, pslFile
            ]
            #print cmd
            Helper.proceedCommand("do blat search for unique reads", cmd,
                                  tempFasta, "None", self.rnaEdit)
        Helper.info(" [%s] Blat finished" % (startTime.strftime("%c")),
                    self.rnaEdit.logFile, self.rnaEdit.textField)
        Helper.info(
            " [%s] Parse Blat output to look for non uniquely mapped reads" %
            (startTime.strftime("%c")), self.rnaEdit.logFile,
            self.rnaEdit.textField)

        if not os.path.isfile(outFile):
            #open psl file
            pslFile = open(pslFile, "r")
            blatDict = {}

            for line in pslFile:  #summarize the blat hits
                pslFields = line.split()
                chr, pos, ref, alt, mmReadCount = pslFields[9].split("-")
                varTuple = (chr, int(pos), ref, alt)
                try:
                    blatScore = [
                        pslFields[0], pslFields[13], pslFields[17],
                        pslFields[18], pslFields[20]
                    ]  # #of Matches, targetName, blockCount, blockSize, targetStarts
                except IndexError:
                    Helper.warning("Not enough Values in '%s' (Skip)" % line,
                                   self.rnaEdit.logFile,
                                   self.rnaEdit.textField)
                    continue
                if varTuple in blatDict:
                    blatDict[varTuple] = blatDict[varTuple] + [blatScore]
                else:
                    blatDict[varTuple] = [blatScore]

            siteDict = {}
            discardDict = {}
            Helper.info(
                " [%s] Analyse Blat hits (Slow)" % (startTime.strftime("%c")),
                self.rnaEdit.logFile, self.rnaEdit.textField)

            #loop over blat Hits
            for varTuple in blatDict.keys(
            ):  #Loop over all blat hits of mmReads to observe the number of Alignements
                keepSNP = False
                chr, pos, ref, alt = varTuple
                pslLine = blatDict[varTuple]
                largestScore = 0
                largestScoreLine = pslLine[0]
                scoreArray = []

                #look for largest blatScore and save the largest line too
                for blatHit in pslLine:
                    lineScore = int(blatHit[0])
                    scoreArray.append(lineScore)
                    if lineScore > largestScore:
                        largestScore = lineScore
                        largestScoreLine = blatHit

                scoreArray.sort(reverse=True)
                if len(scoreArray) < 2:  #test if more than one blat Hit exists
                    scoreArray.append(0)
                if chr == largestScoreLine[1] and scoreArray[1] < scoreArray[
                        0] * 0.95:  #check if same chromosome and hit is lower the 95 percent of first hit
                    blockCount, blockSizes, blockStarts = int(
                        largestScoreLine[2]), largestScoreLine[3].split(
                            ",")[:-1], largestScoreLine[4].split(",")[:-1]
                    for i in range(blockCount):
                        startPos = int(blockStarts[i]) + 1
                        endPos = startPos + int(blockSizes[i])
                        if pos >= startPos and pos < endPos:  #check if alignement overlaps missmatch
                            keepSNP = True

                if keepSNP == True:
                    if varTuple in siteDict:
                        siteDict[varTuple] += 1
                    else:
                        siteDict[varTuple] = 1
                elif keepSNP == False:  #when read not passes the blat criteria
                    if varTuple in discardDict:
                        discardDict[varTuple] += 1
                    else:
                        discardDict[varTuple] = 1
            pslFile.close()

            ##############################################################################
            #####        loop through variants and delete invalid variants          ######
            ##############################################################################
            Helper.info(
                " [%s] Deleting invalid variants" % (startTime.strftime("%c")),
                self.rnaEdit.logFile, self.rnaEdit.textField)

            mmNumberTotal = 0
            mmNumberTooSmall = 0
            mmReadsSmallerDiscardReads = 0
            for key in variants.variantDict.keys():
                numberBlatReads = 0
                numberDiscardReads = 0
                if key in siteDict:
                    numberBlatReads = siteDict[key]
                if key in discardDict:
                    numberDiscardReads = discardDict[key]

                if numberBlatReads <= minMissmatch and numberBlatReads <= numberDiscardReads:
                    del variants.variantDict[key]

                #count statistics
                if numberBlatReads < minMissmatch:
                    mmNumberTooSmall += 1
                elif numberBlatReads < numberDiscardReads:  #check if more reads fit the blat criteria than not
                    mmReadsSmallerDiscardReads += 1
                mmNumberTotal += 1

            if self.rnaEdit.params.keepTemp == False:
                os.remove(tempFasta)
                os.remove(pslFile.name)

            #output statistics
            mmPassedNumber = mmNumberTotal - (mmNumberTooSmall +
                                              mmReadsSmallerDiscardReads)

            Helper.info(
                "\t\t %d out of %d passed blat criteria" %
                (mmPassedNumber, mmNumberTotal), self.rnaEdit.logFile,
                self.rnaEdit.textField)
            Helper.info(
                "\t\t %d Missmatches had fewer than %d missmatching-Reads." %
                (mmNumberTooSmall, minMissmatch), self.rnaEdit.logFile,
                self.rnaEdit.textField)
            Helper.info(
                "\t\t %d Missmatches had more missaligned reads than correct ones."
                % (mmReadsSmallerDiscardReads), self.rnaEdit.logFile,
                self.rnaEdit.textField)

        Helper.printTimeDiff(startTime, self.rnaEdit.logFile,
                             self.rnaEdit.textField)
コード例 #56
0
ファイル: Nginx.py プロジェクト: Sephirothus/unfolding
 def restart(self):
     (Helper()).execute('sudo /etc/init.d/' + self.serviceName + ' restart')
コード例 #57
0
ファイル: Nginx.py プロジェクト: Sephirothus/unfolding
 def enableSite(self, siteName):
     (Helper()).execute('sudo ln -s ' + self.pathAvailable + '/' +
                        siteName + ' ' + self.pathEnabled)
コード例 #58
0
ファイル: Nginx.py プロジェクト: Sephirothus/unfolding
 def disableSite(self, siteName):
     (Helper()).rm(self.pathEnabled + '/' + siteName)
コード例 #59
0
class Config:

	data = {}
	helper = False
	withDependencies = False

	def __init__(self, arg):
		self.withDependencies = arg
		self.helper = Helper()

	def getConf(self):
		return self.data

	# =================== Checking ====================== #

	def createQueue(self, conf):
		queue = []
		try:
			for key, vals in conf.iteritems():
				if (type(vals) is list):
					for val in vals:
						self.checkDependencies(key, val, conf, queue)
				elif (type(vals) is dict):
					for name, params in vals.iteritems():
						self.checkDependencies(key, name, conf, queue, params)
				else:
					self.checkDependencies(key, vals, conf, queue)

			return self.sortQueue(queue)
		except:
			print sys.exc_info()

	def checkDependencies(self, folder, className, conf, queue, params=False):
		curClass = self.helper.getClass(folder + '.' + self.helper.ucfirst(className))()
		if hasattr(curClass, 'dependencies') and self.withDependencies == '1':
			for val in curClass.dependencies:
				curVal = val.split('.')
				if curVal[0] in conf:
					if curVal[1] == conf[curVal[0]] or (hasattr(conf[curVal[0]], 'keys') and curVal[1] in conf[curVal[0]].keys()):
						continue

				self.checkDependencies(curVal[0], curVal[1], conf, queue)

		if params:
			curClass.attrs = {}
			for key, val in params.iteritems():
				curClass.attrs[key] = val
		self.helper.listAdd(curClass, queue)

	def sortQueue(self, queue):
		newQueue = []
		for val in queue:
			if hasattr(val, 'sortOrder'):
				for sortEl in val.sortOrder:
					self.helper.listFindAndAdd(sortEl, queue, newQueue)
			if hasattr(val, 'dependencies'):
				for sortEl in val.dependencies:
					self.helper.listFindAndAdd(sortEl, queue, newQueue)
		self.helper.listMerge(queue, newQueue)
		return newQueue

	# =================== Creation ====================== #

	def createConf(self):
		self.data['dist'] = self.setDist()
		self.data['language'] = self.setLang()
		self.data['server'] = self.setServer()
		# self.data['db'] = self.setDb()
		return self.getConf()

	def choice(self, arr, question, isRequired=False):
		string = '===== '+question+' =====\n'
		num = 0
		for key, val in arr.iteritems():
			string += str(num)+'. '+key+'\n'
			num += 1;

		if (isRequired is False): string += str(num)+'. Nothing\n'
		curChoice = raw_input(string+'Your choice? ')
		if (curChoice == str(num) and isRequired is False): print ''

		try:
			return arr.keys()[int(curChoice)]
		except:
			print 'not correct number'
			func = inspect.getouterframes(inspect.currentframe())[1][3]
			getattr(self, func)()

	def setDist(self):
		grep = self.helper.execute("cat /etc/lsb-release")
		dist = grep.split('\n')[0].split('=')[1].lower()
		if (dist in self.dist): 
			return self.dist[dist]
		else:
			dist = raw_input('type your linux distribution name? ').lower()
			if (dist in self.dist): 
				return dist
			else:
				sys.exit('Sorry, this distribution does not supported:(')

	def setLang(self):
		choice = self.choice(ConfigPaths.languages, 'Select language? ')
		return ConfigPaths.languages[choice]

	def setServer(self):
		choice = self.choice(cl.servers, 'Select http server? ')
		return cl.servers[choice]

	def setVcs(self):
		return ''

	def setDb(self):
		choice = self.choice(self.languages, 'Select database? ')
		return self.languages[choice]
		
コード例 #60
0
 def __init__(self):
     self.helper = Helper()
     self.sid_list = []
     self.core_list = []
     pass