Ejemplo n.º 1
0
    def presence(self):
        # self.peak_hour1 = Label(self.top, text='Peak hour today:', font=("Bookman", 20),
        #                         bg='#3c8081')
        # self.peak_hour2 = Label(self.top, text=str(ApiProcess.get_peak()) + ':00', font=("Bookman", 25),
        #                         fg='#FFA505', bg='#3c8081')
        # self.peak_hour1.pack(side=LEFT, fill=tk.Y)
        # self.peak_hour2.pack(side=LEFT)
        #
        # self.count_visitors1 = Label(self.top, text='   Count of visitors today:', font=("Bookman", 20),
        #                              bg='#3c8081')
        # self.count_visitors2 = Label(self.top, text=str(ApiProcess.get_today_visitors()), font=("Bookman", 25),
        #                              fg='#FFA505', bg='#3c8081')
        # self.count_visitors1.pack(side=LEFT, fill=tk.Y)
        # self.count_visitors2.pack(side=LEFT)
        #
        # self.count_yes_visitors1 = Label(self.top, text='   Count of visitors yesterday:', font=("Bookman", 20),
        #                                  bg='#3c8081')
        # self.count__yes_visitors2 = Label(self.top, text=str(ApiProcess.get_today_visitors()), font=("Bookman", 25),
        #                                   fg='#FFA505', bg='#3c8081')
        # self.count_yes_visitors1.pack(side=LEFT, fill=tk.Y)
        # self.count__yes_visitors2.pack(side=LEFT)

        # fig, ax = plt.subplots(figsize=(6, 3), subplot_kw=dict(aspect="equal"))
        # self.canvas_dwell = FigureCanvasTkAgg(fig, self.middle)
        # self.canvas_repeat = FigureCanvasTkAgg(fig, self.middle)
        # self.make_peak_hour()
        Analytics.connected_visitors(self)
        Analytics.repeat_visitors(self)
Ejemplo n.º 2
0
def ReturnStoreInfo(store):
    try:
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
        }
        Store = {}
        url = 'https://www.walmart.com/store/{}'.format(store)
        res = requests.get(url, headers=headers, proxies=proxies)
        page = bs4.BeautifulSoup(res.text, "lxml")
        Store['Number'] = store
        Google = EditGoogleMaps(
            page.select(
                '#store-side-bar > div.StoreSideBar > div.GoogleMapsIframe > iframe'
            )[0], 400, 400)
        Store['GoogleMaps'] = Markup(Google)
        if len(page.select('.open-24-hours')) == 0:
            Store['StoreHours'] = 'Not 24 Hours'
        else:
            Store['StoreHours'] = '24 Hours'
        Store['Phone'] = MarkupIgnore(page.select('.phone')[0])
        Store['Address2'] = MarkupIgnore(page.select('.address2')[0])
        Store['Address1'] = MarkupIgnore(page.select('.address1')[0])
        Store['Name'] = MarkupIgnore(page.select('.heading-d')[0])
        Store['ItemCount'] = len(Analytics.ConvertStoreToDict(store))
        Store['Econ'] = str("{:,.2f}".format(Status(
            Store['Address2'][-5:])))[:-3]
        Store.update(Analytics.ReturnStoreInfo(store))
        return Store
    except Exception as exp:
        print(exp)
Ejemplo n.º 3
0
 def sequenceDTAs(self):
     curPairedScanData = self._indexedPairData[int(self._pairedScanListbox.curselection()[0])]
     t1 = time.time()
     if curPairedScanData['heavy'] != 'N/A':
         heavySeqMap = copy.deepcopy(self._seqMap)
         heavySeqMap['Mods']['N-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['NModSymbol']
         heavySeqMap['Mods']['C-Term'] = self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']]['CModSymbol']
         sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], heavyPath=self._scanFDict[curPairedScanData['heavy']]['dta'], ppm=self._ppm, usePaired=True, pairConfigName=curPairedScanData['pair configuration'], verbose=False)
         precMass = sharedInfo['lightPrecMass']
     else:
         sharedInfo, starts, ends, deltas, termModHash, specs, G = DNS.initializeSpectrumGraph(self._pnet, self._paramsDict, self._scanFDict[curPairedScanData['light']]['dta'], ppm=self._ppm, verbose=False)
         precMass = sharedInfo['precMass']
     
     epsilon = self._ppm * precMass * 10 ** -6
     paths, subG = DNS.getSpectrumGraphPaths(G, deltas, specs, starts, ends, precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash=termModHash, unknownPenalty=self._ambigpenalty, maxEdge=self._maxedge, minEdge=self._minedge, subGraphCut=self._subgraphcut, subAlpha=0.3, alpha=self._alpha, epsilon=epsilon, aas=self._aas, verbose=False)
     seqTime = time.time() - t1
     if paths:
         seqs = []
         for path in paths:
             seqs.extend([DNS.getSequenceFromNodes(subG, path[1], precMass - Constants.mods['H+'] - Constants.mods['H2O'], termModHash)])
 
         scores = list(zip(*paths)[0])
         Ord = np.argsort(-1 * np.array(scores))
         
         ambigEdges = []
         numAmbig = 0
         for j in range(self._numseq):
             try:
                 for i in range(len(seqs[Ord[j]])):
                     if type(seqs[Ord[j]][i]) == tuple:
                         ambigEdges.extend([seqs[Ord[j]][i]])
                         numAmbig += 1
                         seqs[Ord[j]][i] = '-'
             
                 curSeq = ''.join(seqs[Ord[j]])
                 curSeq = An.preprocessSequence(curSeq, self._seqMap, ambigEdges=ambigEdges)
                 if j == 0 and curPairedScanData['heavy'] != 'N/A':
                     try:
                         curHeavySeq = An.preprocessSequence(curSeq, heavySeqMap, replaceExistingTerminalMods=True, ambigEdges=ambigEdges)
                         AAs = An.getAllAAs(curHeavySeq, ambigEdges=ambigEdges)
                         self._seqStatus.set('Paired Sequencing Successful! Heavy Sequence: %s. Time taken: %f seconds' % (curHeavySeq, seqTime))
                     except KeyError:
                         self._seqStatus.set('ERROR: Heavy Sequence %s is not a valid sequence! Time wasted: %f seconds' % (curHeavySeq, seqTime))
                 elif j == 0:
                     self._seqStatus.set('Unpaired Sequencing Successful! Time taken: %f seconds' % (seqTime))
                 
                 for labelInst in self._seqScoreData[j]['seq'].children.values():
                     labelInst.destroy()
                 self.displayConfColoredSequence(subG, self._seqScoreData[j]['seq'], paths[Ord[j]][1], curSeq, ambigEdges=ambigEdges)
                 self._seqScoreData[j]['score'].set(str(scores[Ord[j]]))
             except IndexError:
                 for labelInst in self._seqScoreData[j]['seq'].children.values():
                     labelInst.destroy()
                 self._seqScoreData[j]['score'].set('')
     else:
         self._seqStatus.set('ERROR: No Sequences Found! Time wasted: %f seconds' % seqTime)
Ejemplo n.º 4
0
def validateHeavySequence(seq, heavySeqMap, ambigEdges):
    try:
        if seq != '-':
            heavySeq = Analytics.preprocessSequence(seq, heavySeqMap, replaceExistingTerminalMods=True, ambigEdges=ambigEdges)
            AAs = Analytics.getAllAAs(heavySeq, ambigEdges=ambigEdges)
            return True
        else:
            return False
    except KeyError:
        return False
Ejemplo n.º 5
0
def split(train_data, train_labels, extra_data, extra_labels):
    random.seed()
    n_labels = 10
    valid_index = []
    valid_index2 = []
    train_index = []
    train_index2 = []

    for i in np.arange(n_labels):
        # Add the first 400 index's in training labels that start with i
        valid_index.extend(np.where(train_labels[:, 1] == i)[0][:400].tolist())
        # Add the rest of the index's to this list
        train_index.extend(np.where(train_labels[:, 1] == i)[0][400:].tolist())
        # The first 200 from the extra set
        valid_index2.extend(
            np.where(extra_labels[:, 1] == i)[0][:200].tolist())
        # The rest of the extra set
        train_index2.extend(
            np.where(extra_labels[:, 1] == i)[0][200:].tolist())

    # Randomize the lists
    random.shuffle(valid_index)
    random.shuffle(train_index)
    random.shuffle(valid_index2)
    random.shuffle(train_index2)

    # add the extra_data that the valid_index2 index's and the train_data at the
    # valid_index index's. Then shuffle the data so the labels are the first
    # column the pixles are the 2, 3rd and the colors (RGB) are the last one.
    valid_data = np.concatenate(
        (extra_data[valid_index2, :, :, :], train_data[valid_index, :, :, :]),
        axis=0)
    # Do the same thing with the valid set lables
    valid_labels = np.concatenate(
        (extra_labels[valid_index2, :], train_labels[valid_index, :]), axis=0)
    # Do the same thing with the training data
    train_data = np.concatenate(
        (extra_data[train_index2, :, :, :], train_data[train_index, :, :, :]),
        axis=0)
    # Do the same thing with the training labels
    train_labels = np.concatenate(
        (extra_labels[train_index2, :], train_labels[train_index, :]), axis=0)

    print("Training set created with shape")
    print(train_data.shape, train_labels.shape)
    print("Validation set created with shape")
    print(valid_data.shape, valid_labels.shape)

    Analytics.load()
    Analytics.data_set_size['train'] = train_data.shape[0]
    Analytics.data_set_size['valid'] = valid_data.shape[0]
    Analytics.save()

    return train_data, train_labels, valid_data, valid_labels
Ejemplo n.º 6
0
def customer(enviroment, ressource, kundenNummer):
    """Modelliert einen Kunden in der Tierhandlung."""
    inSystem = enviroment.now
    event = ressource.getIn()
    Analytics.addWaitsAtPoint(enviroment.now, waitingCustomers())
    if waitingCustomers() > L:
        enviroment.process(counterOpener(enviroment))
    Analytics.addAverageQueueLengthAtPoint(
        enviroment.now,
        waitingCustomers() / len(ressource.waitsFor))
    yield event
    gainedCounter = ressource.getCounter(event)
    with gainedCounter.request() as req:
        yield req
        Analytics.addWaittimePerCustomer(kundenNummer,
                                         enviroment.now - inSystem)
        tiere = 1
        for i in range(4):
            if lcg.nextBool(0.5):
                tiere += 1
                #Binominalverteilte Inkrementierung: Zwischen +0 und +4.
        bezahlzeit = tiere * Tr + lcg.nextTransformed(inverseCDFPareto)
        print("Kunde %i wird %f Minuten für das Bezahlen brauchen" %
              (kundenNummer, bezahlzeit))
        yield enviroment.timeout(bezahlzeit)
    Analytics.addTotaltimePerCustomer(kundenNummer, enviroment.now - inSystem)
    ressource.nextActionForCounter(gainedCounter)
    return
Ejemplo n.º 7
0
 def __init__(self, rs, vehics):
     self.rs = rs
     self.vehics = vehics
     self.rects = []
     self.lc = LightController(self)
     self.analytics = Analytics()
     self.learning = Learning(self)
Ejemplo n.º 8
0
def parseScans(fDict, prog, seqMap, dbDict, delimiter=',', srchID = None, seqDelimLen=2):
    processedInfo = {}
    for csvfile in fDict.keys():
        MASCOTData = DataFile.getScanInfo(csvfile, dbDict[prog]['fields'] + (['SrchID'] if srchID != None else []), delimiter=delimiter)
        processedInfo[fDict[csvfile]] = An.preprocessDatabaseScanInfo(MASCOTData, seqMap[fDict[csvfile]], dbDict[prog]['fieldmap'], srchID = srchID, seqDelimLen=seqDelimLen)
    
    return processedInfo
Ejemplo n.º 9
0
def parseDBScans(fDict, prog, seqMap, dbDict):
    processedInfo = {}
    for csvfile in fDict.keys():
        MASCOTData = DataFile.getScanInfo(csvfile, dbDict[prog]['fields'], delimiter=',')
        processedInfo[fDict[csvfile]] = An.preprocessDatabaseScanInfo(MASCOTData, seqMap[fDict[csvfile]], dbDict[prog]['fieldmap'])
    
    return processedInfo
Ejemplo n.º 10
0
 def __init__(self):
     self.health = playerhealth
     self.money = 0
     self.gems = 0
     self.upgPathSelectLvl = 5
     self.abilities = list()
     self.wavenum = 0
     self.gameover = False
     self.towerSelected = None
     self.tbbox = None
     self.layout = None
     self.wavestart = 999
     self.next_wave = False
     self.pausetime = 0
     self.state = "Start"
     self.restart = False
     self.score = 0
     self.newMoveList = False
     self.wavetime = None
     self.wavetimeInt = None
     self.myDispatcher = EventDispatcher.EventDisp()
     self.analytics = Analytics.Analytics()
     self.store = DictStore('settings.txt')
     if self.store.exists('audio'):
         self.soundOn = self.store.get('audio')['soundOn']
         self.musicOn = self.store.get('audio')['musicOn']
     else:
         self.soundOn = True
         self.musicOn = True
Ejemplo n.º 11
0
class Overwatch:

    def __init__(self, rs, vehics):
        self.rs = rs
        self.vehics = vehics
        self.rects = []
        self.lc = LightController(self)
        self.analytics = Analytics()
        self.learning = Learning(self)

        
    #removes the given vehic from the vehicles list so it will be garbage collected
    def removeVehic(self, vehic):
        self.vehics.remove(vehic)
        self.analytics.vehicPassed(vehic)


    #automatically spawns vehicls at the rate defined in the road system data file
    def autoVehicSpawn(self, frameCount, screen):
        defaultSpawnInfo = self.rs.entranceExitDefaults
        for key in self.rs.spawns.mods:
            if frameCount % self.rs.spawns.mods[key] == 0:
                rand = random.randint(1,100)
                if rand%2==0:
                    self.spawnVehic(key, 1, defaultSpawnInfo[key][0], screen)
                elif rand%2==1:
                    self.spawnVehic(key, 2, defaultSpawnInfo[key][0], screen)

    #returns the last vehicle spawned on the given road in the given lane
    def getPrevVehic(self, road, lane):
        if len(self.vehics) == 0 : return None
        limit = -1*len(self.vehics)
        i = -1
        while  i >= limit:
            if self.vehics[i].road == road and self.vehics[i].lane.id == lane : return self.vehics[i]
            i -= 1
        return None

    #spawns a vehicle at the given entrance, in the given lane, headed for the given exit, and adds it to the vehicle list
    def spawnVehic(self, entrance, lane, exit, screen):
        entrance = self.rs.features[entrance]
        road = entrance.road
        exit = self.rs.features[exit]
        frontVehic = self.getPrevVehic(road, lane)
        v = Vehicle(screen, self.rs, entrance, road, road.lanes[lane-1], frontVehic, exit, self)
        road.lanes[lane-1].vehicles.append(v)    
        self.vehics.append(v)
Ejemplo n.º 12
0
def preprocess_image(digitStruct, dataset=""):
    filename = os.path.join(dataset, digitStruct['filename'])
    im = mpimg.imread(filename)

    # Analytics code
    Analytics.load()
    if im.shape[0] > Analytics.max_height[dataset]:
        Analytics.max_height[dataset] = im.shape[0]
    if im.shape[1] > Analytics.max_width[dataset]:
        Analytics.max_width[dataset] = im.shape[1]
    Analytics.save()

    t, l, w, h = find_bounding_box(digitStruct)
    t, l, w, h = scale(t, l, w, h)
    cropped = crop(im, t, l, w, h)
    resized = resize(cropped)
    gray = grayscale(resized)
    return gray
Ejemplo n.º 13
0
 def listDTAs(self):
     self._dtaList = glob.glob(self._selectedDir.get() + '/*.dta')
     if not self._dtaList:
         self._selectedDir.set('No DTAs in selected directory!')
     else:
         self._scanFDict = An.getScanFDict(self._dtaList)
         self._indexedScanFList = np.zeros(len(self._scanFDict))
         for i, scanF in enumerate(sorted(self._scanFDict.keys())):
             self._scanFListbox.insert(END, str(scanF))
             self._indexedScanFList[i] = scanF
Ejemplo n.º 14
0
def add_numbers():
    a = request.args.get('a', 0, type=str)
    store = request.args.get('b', 0, type=str)
    # a is the SKIN or search Query
    a = Analytics.LocalPrice(store, str(a))
    a = '{} - {} In Stock'.format('${:,.2f}'.format(a[0]), a[1])
    if a != None:
        return jsonify(result=str(a))
    else:
        return jsonify(result='Item Not Available')
Ejemplo n.º 15
0
	def GrabPrice(smalllist):

		for s in smalllist:
			try:
				a = Analytics.OnlinePricingInfo(s)
				b = [s, a['ListPrice'], a['Price']]
				print(b)
				Info.append(b)
			except BaseException as exp:
				print(exp)
				pass
Ejemplo n.º 16
0
def getLabels(digitStruct, folder):
    label = np.ones([6], dtype=int) * 10
    boxes = digitStruct['boxes']
    num_digits = len(boxes)
    label[0] = num_digits

    #Analytics Code
    Analytics.load()
    slot = num_digits
    if num_digits > 5:
        slot = 6
    Analytics.sequence_lengths[folder][slot] += 1
    Analytics.save()

    for i in np.arange(num_digits):
        if i < 5:
            label[i + 1] = boxes[i]['label']
            if boxes[i]['label'] == 10:
                label[i + 1] = 0
    return label
Ejemplo n.º 17
0
 def displayConfColoredSequence(self, G, masterFrame, path, seq, ambigEdges=None):
     nodeGen = Constants.nodeInfoGen(seq, addTerminalNodes=False, considerTerminalMods=True, ambigEdges=ambigEdges)
     prevNode = None
     for i, node in enumerate(nodeGen):
         print node, path[i+1]
         node['prm'] = path[i+1]
         confScore = An.getAAConfidence(G, prevNode=prevNode, nextNode=node)
         prevNode = node
         hexColor = self.getHexString(np.array([1-confScore, confScore, 0]))
         if prevNode == None and seq[len(node['formAA'])] in Constants.NTermMods:
             Label(masterFrame, text=node['formAA']+seq[len(node['formAA'])], fg='white', bg=hexColor).pack(side=LEFT)
         else:
             Label(masterFrame, text=node['formAA'], fg='white', bg=hexColor).pack(side=LEFT)
     
     confScore = An.getAAConfidence(G, prevNode=prevNode, nextNode=None)
     hexColor = self.getHexString(np.array([1-confScore, confScore, 0]))
     if seq[-1] in Constants.CTermMods:
         Label(masterFrame, text=node['lattAA']+seq[-1], fg='white', bg=hexColor).pack(side=LEFT)
     else:
         Label(masterFrame, text=node['lattAA'], fg='white', bg=hexColor).pack(side=LEFT)
Ejemplo n.º 18
0
 def getPairs(self):
     pairs = {}
     for scanF in self._scanFDict:
         self._scanFDict[scanF]['paired scans'] = {}
     for pairConfigName in self._paramsDict['Pair Configurations']:
         pairConfig = self._paramsDict['Pair Configurations'][pairConfigName]
         pairs[pairConfigName] = An.findDeltaPairs(self._dtaList, pairConfig['NMod']+pairConfig['CMod'], ppm=self._ppm)
         for pair in pairs[pairConfigName]:
             pairData = {'light': pair[0], 'heavy': pair[1], 'pair configuration': pairConfigName, 'pair score': None, 'light precmass': self._scanFDict[pair[0]]['precMass'], 'heavy precmass': self._scanFDict[pair[1]]['precMass']}
             self._scanFDict[pair[0]]['paired scans'][pair[1]] = pairData
             self._scanFDict[pair[1]]['paired scans'][pair[0]] = pairData
def preprocess_image(digitStruct, dataset="", single=False):
    processed_images = []

    filename = os.path.join(dataset, digitStruct['filename'])
    im = mpimg.imread(filename)

    # Analytics code
    Analytics.load()
    if im.shape[0] > Analytics.max_height[dataset]:
        Analytics.max_height[dataset] = im.shape[0]
    if im.shape[1] > Analytics.max_width[dataset]:
        Analytics.max_width[dataset] = im.shape[1]
    Analytics.save()

    t, l, w, h = find_bounding_box(digitStruct)
    t30, l30, w30, h30 = scale(t, l, w, h)
    w_prime = w + ((w30 - w) / 2)
    h_prime = h + ((h30 - h) / 2)

    cropped = crop(im, t30, l30, w30, h30)
    processed_images.append(cropped)

    if single == False:
        cropped = crop(im, t30, l30, w_prime, h_prime)
        processed_images.append(cropped)

        cropped = crop(im, t30, l, w_prime, h_prime)
        processed_images.append(cropped)

        cropped = crop(im, t, l30, w_prime, h_prime)
        processed_images.append(cropped)

        cropped = crop(im, t, l, w_prime, h_prime)
        processed_images.append(cropped)

    for i in range(len(processed_images)):
        processed_images[i] = resize(processed_images[i])
        processed_images[i] = grayscale(processed_images[i])
        #Visualize.display_processed_example(processed_images[i])

    return processed_images
Ejemplo n.º 20
0
 def compute_initial_figure(self):
     UsersPerCountry, UsersPerPlatform = Analytics.UsersPerCountryOrPlatform()
     labels = []
     sizes = []
     print(UsersPerPlatform)
     for p, c in sorted(UsersPerPlatform.iteritems()):
         labels.append(p)
         sizes.append(c)
         colors = ['turquoise', 'yellowgreen', 'firebrick', 'lightsteelblue', 'royalblue']
     pylab.pie(sizes, colors=colors, labels=labels, autopct='%1.1f%%', shadow=True)
     pylab.title('Users Per Platform')
     pylab.gca().set_aspect('1')
     pylab.show()
def generate_dataset(data, folder, single=False):
    target_size = 64
    if folder == 'test' and not example:
        Analytics.load()
        Analytics.data_set_size[folder] = len(data)
        Analytics.save()
    data_point_per_image = 5
    if single == True:
        data_point_per_image = 1
    dataset = np.ndarray(
        [len(data) * data_point_per_image, target_size, target_size, 1],
        dtype='float32')
    labels = np.ones([len(data) * data_point_per_image, 6], dtype=int) * 10
    offset = 0
    for i in np.arange(len(data)):
        processed_images = preprocess_image(data[i], folder, single)
        dataset[offset:offset +
                len(processed_images), :, :, :] = processed_images
        labels[offset:offset + len(processed_images), :] = getLabels(
            data[i], folder)
        offset += len(processed_images)

    print(folder)
    print(np.mean(dataset))
    print(np.std(dataset))

    # Analytics
    #Analytics.load()
    #Analytics.means[folder] = np.mean(dataset)
    #Analytics.stds[folder] = np.std(dataset)
    #Analytics.save()

    #dataset = normalize(dataset)

    print(folder, "dataset created.")
    print(dataset.shape)
    print(labels.shape)
    return dataset, labels
Ejemplo n.º 22
0
 def updatePairInfo(self, event):
     curPairedScanData = self._indexedPairData[int(self._pairedScanListbox.curselection()[0])]
     if curPairedScanData['pair score'] == None:
         curPairedScanData['pair score'] = An.getSharedPeaksRatio(self._scanFDict[curPairedScanData['light']]['dta'], self._scanFDict[curPairedScanData['heavy']]['dta'], self._paramsDict['Pair Configurations'][curPairedScanData['pair configuration']], epsilon=self._ppm * 10**-6 * curPairedScanData['light precmass'])
     for labelDatum in self._pairInfoLabelVars:
         self._pairInfoLabelVars[labelDatum].set(str(curPairedScanData[labelDatum]))
     
     if curPairedScanData['pair score'] != 'N/A':
         if curPairedScanData['pair score'] > self._paircutoff:
             self._pairScoreLabel.config(fg='dark green')
         else:
             self._pairScoreLabel.config(fg='red')
     else:
         self._pairScoreLabel.config(fg='black')
def getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoSeq, deNovoUnmodSeq, refSeq, alignedIndsMap, deNovoAmbigEdges=[]):
    prevIntervalStart = newRefEndInds['start']
    #print 'End Inds', newRefEndInds
    tempSeq = ''
    tempAmbigEdges = []
    for interval in sorted(modList):
        if 'Isobaric' not in modList[interval][0][0] and not ('Insertion' in modList[interval][0][0] and (alignedIndsMap['De Novo'][interval[0]] < 2 or (len(deNovoUnmodSeq) - alignedIndsMap['De Novo'][interval[1]]) < 2)):
            tempSeq += refSeq[prevIntervalStart:alignedIndsMap['Ref'][interval[0]]] + 'X'
            #print 'Mod list interval', modList[interval]
            tempAmbigEdges += [(0, modList[interval][0][3])]
            #print 'temp ambig edges', tempAmbigEdges
            prevIntervalStart = alignedIndsMap['Ref'][interval[1]]
    #print 'TempSeq', tempSeq, tempAmbigEdges
    tempSeq += refSeq[prevIntervalStart:(len(refSeq) + newRefEndInds['end'])]
    #print deNovoSeq, refSeq, tempSeq, deNovoAmbigEdges, tempAmbigEdges
    comp = An.comparePeptideResults(deNovoSeq, tempSeq, ambigEdges1=deNovoAmbigEdges, ambigEdges2=tempAmbigEdges, ppm=10)
    return comp[0], comp[1]
Ejemplo n.º 24
0
def resetGame():
    '''Resets game variables so player can restart the game quickly.'''
    Player.player.state = 'Restart'
    stopAllAnimation()
    Player.player.gameover = False
    Map.mapvar.getStartPoints()
    Map.mapvar.flylistgenerated = False
    Map.mapvar.flymovelists = []
    Map.mapvar.pointmovelists = []
    Localdefs.towerGroupDict = {'Life': [], 'Fire': [], 'Ice': [], 'Gravity': [], 'Wind': []}
    AllLists = [Localdefs.towerlist, Localdefs.bulletlist, Localdefs.menulist, Localdefs.explosions,
                Localdefs.senderlist, Localdefs.timerlist, Localdefs.shotlist, Localdefs.alertQueue]
    i = 0
    for list in AllLists:
        while i < len(list):
            list.pop()
    for tower in Map.mapvar.towercontainer.children:
        if tower.type != 'Base':
            tower.remove()
    Map.mapvar.baseimg = None
    Map.mapvar.towercontainer.clear_widgets()
    Map.mapvar.enemycontainer.clear_widgets()
    for road in Map.mapvar.roadcontainer.children:
        road.iceNeighbor = False
    Map.mapvar.roadcontainer.clear_widgets()
    Map.mapvar.shotcontainer.clear_widgets()
    Map.mapvar.wallcontainer.clear_widgets()
    Map.mapvar.towerdragimagecontainer.clear_widgets()
    Player.player.wavenum = 0
    Player.player.wavetime = int(Map.mapvar.waveseconds)
    Player.player.myDispatcher.Timer = str(Player.player.wavetime)
    Player.player.health = Player.playerhealth
    Player.player.myDispatcher.Health = str(Player.player.health)
    Player.player.score = 0
    Player.player.myDispatcher.Score = str(Player.player.score)
    Player.player.analytics = Analytics.Analytics()
    __main__.ids.wavestreamer.removeWaveStreamer()
    __main__.ids.wavescroller.scroll_x = 0
    __main__.ids.play.text = 'Start'
    if Messenger.messenger.bgrect:
        Map.mapvar.background.canvas.after.remove(Messenger.messenger.bgrect)
        Messenger.messenger.bgrect = None
Ejemplo n.º 25
0
def generate_dataset(data, folder):
    if folder == 'test':
        Analytics.load()
        Analytics.data_set_size[folder] = len(data)
        Analytics.save()
    dataset = np.ndarray([len(data), 50, 50, 1], dtype='float32')
    labels = np.ones([len(data), 6], dtype=int) * 10
    for i in np.arange(len(data)):
        dataset[i, :, :, :] = preprocess_image(data[i], folder)
        labels[i, :] = getLabels(data[i], folder)

    # Analytics
    Analytics.load()
    Analytics.means[folder] = np.mean(dataset)
    Analytics.stds[folder] = np.std(dataset)
    Analytics.save()

    dataset = normalize(dataset)

    print(folder, "dataset created.")
    print(dataset.shape)
    print(labels.shape)
    return dataset, labels
Ejemplo n.º 26
0
def getUniquePeptDict(scanDict, scoreKey, peptideKey, scanKey = 'ScanF', nullVal = 'None', noStrip=['#'], datasets=None):
    scanFDict = defaultdict(lambda: dict([(dataset, []) for dataset in datasets]))
    uniquePeptDict = {}
    if datasets == None:
        datasets = scanDict.keys()

    for dataset in datasets:
        for item in scanDict[dataset]:
            if item[peptideKey] == nullVal:
                continue
            
            strippedPept = An.stripModifications(item[peptideKey], noRemove=noStrip)
            
            if strippedPept in uniquePeptDict and float(item[scoreKey]) > float(uniquePeptDict[strippedPept][scoreKey]):
                uniquePeptDict[strippedPept] = item
            elif strippedPept not in uniquePeptDict:
                uniquePeptDict[strippedPept] = item


            scanFDict[strippedPept][dataset] += [item[scanKey]]

    return uniquePeptDict, scanFDict
Ejemplo n.º 27
0
kundenNummer = [0]
lcg = LCG()
timestamp = int(time.time())
openerOperation = [False]
for lamda in numpy.arange(0.5, 10.5, 0.5):
    for r in range(10):
        env = simpy.Environment(8 * 60)
        #Starte die Simulation um 8 Uhr. Das sind 8*60 Minuten nach Mitternacht.
        counters = MR(env, K)
        kundenNummer[0] = 0
        openerOperation[0] = False
        env.process(generate(env))
        env.process(counterOpener(env))
        env.run(until=16 * 60)
        #Lasse die Simulation bis 16 Uhr laufen. Das ist dann 16*60 Minuten nach Mitternacht.
        Analytics.storeRun(lamda, r + 1, kundenNummer[0])
        #Zeige Mittelwerte an.
        print("mittlere Anzahl wartender Kunden: %f" % Analytics.meanWaits())
        print("mittlere Wartezeit: %f" % Analytics.meanWaittimePerCustomer())
        print("mittlere Verweilzeit: %f" %
              Analytics.meanTotaltimePerCustomer())
        print("mittlere Warteschlangenlänge: %f" %
              Analytics.meanAverageQueueLength())
        #Erzeuge, zeige und speichere Graphen.
        #Analytics.createWaitAtPointGraph();
        #Analytics.createWaittimePerCustomerGraph();
        #Analytics.createTotaltimePerCustomerGraph();
        #Exportiere Daten in das CSV-Format.
        #Analytics.exportWaitsAtPoint("wartende_Kunden%i.csv"%timestamp);
        #Analytics.exportTotaltimePerCustomer("Verweilzeiten%i.csv"%timestamp);
        #Analytics.exportWaittimePerCustomer("Wartezeiten%i.csv"%timestamp);
        unimodDict = pickle.load(fin)
    hashedUnimodDict = hashUnimodDict(unimodDict)

    outFile = open(options.output, 'w')
    cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score']
    if 'Ambig Edges' in infoDict:
        cols.insert(2, 'Ambig Edges')
        
    outFile.write('\t'.join([col for col in cols]) + '\n')

    for entry in DataFile.getScanInfo(options.comp, delimiter='\t'):
        scanData = {}
        print "New scan", entry
        scanData['ScanF'] = entry[infoDict['ScanF']]
        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []

        massIntPairs = DataFile.getMassIntPairs(scanFDict[int(scanData['ScanF'])]['dta'])
        spec = PN.Spectrum(PNet, precMass, epsilon=2*epSTD, spectrum=massIntPairs)

        try:
            #Ignore de novo peptides with noncanonical amino acids for now
            epsilon = 2 * 10**-6 * options.ppmstd * An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)[-1]
        except KeyError:
Ejemplo n.º 29
0
    dtaList = glob.glob(options.dtadir + '/*.dta')
    scanFDict = getScanFDict(dtaList)
    
    aas = Constants.addPepsToAADict(300)
    hashedAAs = Constants.hashAAsEpsilonRange(aas, epStep, maxEp)
    
    ambigOpenPenalty = 0
    ambigPenaltyFun = DNS.getAmbigEdgePenaltyFunction(options.minedge, ambigOpenPenalty, options.ambigpenalty)
    ppmPenaltyFun = DNS.getPPMPenaltyFun(options.ppmstd, hashedAAs, options.minedge, options.ppmpenalty, options.ppmsyserror, epStep)
    
    print 'Getting Clusters'
    parent = os.path.abspath(os.pardir)
    clusterSVMModel = svmutil.svm_load_model(parent + paramsDict['Cluster Configuration']['model'])
    clusterSVMRanges = svmutil.load_ranges(parent + os.path.splitext((paramsDict['Cluster Configuration']['model']))[0] + '.range')

    precMassClusters = Analytics.findSamePrecMassClusters(dtaList, ppm=options.ppmstd)
#    print 'precMassClusters', precMassClusters                                                                                                                                                                      
    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=float(paramsDict['Cluster Configuration']['cutoff']))
#    samePeptideClusters = Analytics.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)
#    samePeptideClusters = An.getSamePeptideClusters(precMassClusters, scanFDict, clusterSVMModel, clusterSVMRanges, ppmSTD=options.ppmstd, cutOff=4)

    # To test without any clustering
    #samePeptideClusters = [[scanF] for scanF in scanFDict]
    
    for pairConfigName in paramsDict['Pair Configurations']:
        
        print 'Getting heavy-light pairs for %s' % (pairConfigName,)
        t1 = time.time()

        pairConfig = paramsDict['Pair Configurations'][pairConfigName]
        pairs = Analytics.findDeltaPairsClusters(samePeptideClusters, scanFDict, pairConfig['NMod']+pairConfig['CMod'], ppm=options.ppmstd)
Ejemplo n.º 30
0
    def do_training(self):
        self.train_accs = []
        self.valid_accs = []
        print("Training")
        with self.sess.as_default():
            start = time.time()
            for epoch in range(self.epochs):
                offset = 0
                #epoch_loss = 0
                while offset < self.train_dataset.shape[0]:
                    X_batch, y_batch = load_minibatch(
                        self.train_dataset,
                        self.train_labels,
                        offset,
                        self.batch_size,
                    )
                    _, l, summary = self.sess.run(
                        [self.train_op, self.loss, self.summary_op],
                        feed_dict={
                            self.x: X_batch,
                            self.y: y_batch,
                            self.keep_prob: 0.5,
                            self.phase_train: True
                        })
                    #epoch_loss += l * X_batch.get_shape().as_list()[0]
                    offset = min(offset + self.batch_size,
                                 self.train_dataset.shape[0])
                train_pred = self.prediction.eval(
                    feed_dict={
                        self.x: X_batch,
                        self.y: y_batch,
                        self.keep_prob: 1.0,
                        self.phase_train: False
                    })

                train_acc = accuracy(train_pred, y_batch[:, 1:6])
                print("Training Accuracy", train_acc, "at Epoch:", epoch)
                self.train_accs.append([epoch, train_acc])

                offset = 0
                valid_acc = 0
                count = 0
                while offset < self.valid_dataset.shape[0]:
                    X_batch, y_batch = load_minibatch(self.valid_dataset,
                                                      self.valid_labels,
                                                      offset, self.batch_size)
                    valid_pred = self.prediction.eval(
                        feed_dict={
                            self.x: X_batch,
                            self.y: y_batch,
                            self.keep_prob: 1.0,
                            self.phase_train: False
                        })
                    temp_valid_acc = accuracy(valid_pred, y_batch[:, 1:6])
                    old_offset = offset
                    offset = min(offset + self.batch_size,
                                 self.valid_dataset.shape[0])
                    count += 1
                    #valid_acc = ((valid_acc * old_offset) +
                    #			 (temp_valid_acc * self.batch_size)/offset)
                    valid_acc += temp_valid_acc
                valid_acc = valid_acc / count
                self.valid_accs.append([epoch, valid_acc])
                print("Valididation Accuracy", valid_acc, "at Epoch:", epoch)

                save_path = self.saver.save(self.sess, self.savepath)
                self.writer.add_summary(summary, epoch)

                # Graph of accuracies
                self.graph.update([x[0] for x in self.train_accs],
                                  [y[1] for y in self.train_accs], "b",
                                  "Training")
                self.graph.update([x[0] for x in self.valid_accs],
                                  [y[1] for y in self.valid_accs], "r",
                                  "Validation")
                #if epoch == 0:
                #	self.graph.addLegend()
            print("Evaluating Test Dataset")
            offset = 0
            test_acc = 0
            count = 0
            while offset < self.test_dataset.shape[0]:
                X_batch, y_batch = load_minibatch(self.test_dataset,
                                                  self.test_labels, offset,
                                                  self.batch_size)
                test_pred = self.prediction.eval(
                    feed_dict={
                        self.x: X_batch,
                        self.y: y_batch,
                        self.keep_prob: 1.0,
                        self.phase_train: False
                    })
                temp_test_acc = accuracy(test_pred, y_batch[:, 1:6])
                old_offset = offset
                offset = min(offset + self.batch_size,
                             self.test_dataset.shape[0])
                count += 1
                #test_acc = ((test_acc * old_offset) +
                #			(temp_test_acc * self.batch_size)/offset)
                test_acc += temp_test_acc
            test_acc = test_acc / count
            print("Test accuracy: %.1f%%" % (test_acc))

            Analytics.train_time = time.time() - start
            Analytics.train_accuracy = train_acc
            Analytics.valid_accuracy = valid_acc
            Analytics.test_accuracy = test_acc
            Analytics.display()
            Analytics.save()
            plt.show()
def compareSequences(deNovoPep, deNovoUnmodPep, refPep, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = [], epsilon = 0.02):
    if 'X' in refPep:
        refPep = refPep.translate(None, 'X')

    # KLUDGE: REMOVE WHEN REWRITE
    #deNovoPep = An.stripModifications(deNovoPep, noRemove=['#', '*'])
    
    alignment = getAlignment(deNovoUnmodPep, refPep, AAMap, scoreMatrix)
    alignedIndsMap = getAlignedIndsMap(alignment)
    

    disagreeArr = [1 if alignment[0][i] == alignment[1][i] else 0 for i in range(len(alignment[0]))]
    intervals = getConnectedDisagreementRegions(disagreeArr)

    try:
        refPRMLadder = An.getPRMLadder(refPep)
    except KeyError:
        return None
    
    deNovoPRMLadder = An.getPRMLadder(deNovoPep, ambigEdges=deNovoAmbigEdges)

    allResolved = True
    modList = {}
    newRefEndInds = {'start': 0, 'end': 0}

    # rough check of whether or not intervals can be easily explained
    for interval in intervals:
        deNovoSubSeq = deNovoUnmodPep[alignedIndsMap['De Novo'][interval[0]]:alignedIndsMap['De Novo'][interval[1]]]
        refSubSeq = refPep[alignedIndsMap['Ref'][interval[0]]:alignedIndsMap['Ref'][interval[1]]]

        if alignedIndsMap['De Novo'][interval[0]] == 0:
                term = 'N-term'
        elif alignedIndsMap['De Novo'][interval[1]] == len(deNovoUnmodPep):
            term = 'C-term'
        else:
            term = None

        if deNovoSubSeq != '' and refSubSeq != '':
            deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]]
            if term == None:
                refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                modList[interval] = resolveInterval(refMass, deNovoMass, refSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon), deNovoSubSeq, refSubSeq
            else:
                minSizedMod = ((None, None, 10000000,),)
                for i in range(len(refSubSeq)):
                    if term == 'N-term':
                        refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]] + i]
                        subRefSubSeq = refSubSeq[i:]
                    else:
                        refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]] - i] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                        subRefSubSeq = refSubSeq[:-i]
                    mod = resolveInterval(refMass, deNovoMass, subRefSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, epsilon=epsilon)
                    if 'TX' in deNovoUnmodPep:
                        print deNovoSubSeq, refSubSeq, subRefSubSeq, mod
                    if (abs(minSizedMod[0][2]) > abs(mod[2]) and (minSizedMod[0][0] == None or 'Isobaric' not in minSizedMod[0][0])) or 'Isobaric' in mod[0]:
                        if mod[1] != None or (mod[1] == None and minSizedMod[0][1] == None) or ('Isobaric' in mod[0] and 'Isobaric' not in minSizedMod[0][0]):
                            minSizedMod = mod, deNovoSubSeq, subRefSubSeq
                            if term == 'N-term':
                                newRefEndInds['start'] = i
                            else:
                                newRefEndInds['end'] = -i
                modList[interval] = minSizedMod
                    
        else:
            # Make sure that lack of sequence is due to overhang of reference peptide
            if alignedIndsMap['De Novo'][interval[1]] == 0:
                newRefEndInds['start'] = len(refSubSeq)
            elif alignedIndsMap['De Novo'][interval[0]] == len(deNovoUnmodPep):
                newRefEndInds['end'] = -len(refSubSeq)
#            elif term != None:
#                raise ValueError('Not enough reference sequence provided for resoluton of terminal discrepancies. De Novo: %s, Reference %s' % (deNovoPep, refPep))
            elif term == None:
                if deNovoSubSeq == '':
                    refMass = refPRMLadder[alignedIndsMap['Ref'][interval[1]]] - refPRMLadder[alignedIndsMap['Ref'][interval[0]]]
                    modList[interval] = ('Deletion', refMass, 0, -refMass), deNovoSubSeq, refSubSeq
                else:
                    deNovoMass = deNovoPRMLadder[alignedIndsMap['De Novo'][interval[1]]] - deNovoPRMLadder[alignedIndsMap['De Novo'][interval[0]]]
                    modList[interval] = ('Insertion', deNovoMass, 0, deNovoMass), deNovoSubSeq, refSubSeq

    #print 'Mod List: ', modList
    acc, prec =  getAccAndPrecForModRefPeptide(modList, newRefEndInds, deNovoPep, deNovoUnmodPep, refPep, alignedIndsMap, deNovoAmbigEdges)
    
    return modList, newRefEndInds, alignment, acc, prec
def getSpectrumAndPSMFeatureDict(LADSSeqInfo, seqEntry, scanFDict, pairConfig, PNet):

    featureList = []
    lightScans = seqEntry[0]
    heavyScans = seqEntry[1]
    
    lightSpecs = [DataFile.getMassIntPairs(scanFDict[int(lightScanF)]['dta']) for lightScanF in lightScans]
    heavySpecs = [DataFile.getMassIntPairs(scanFDict[int(heavyScanF)]['dta']) for heavyScanF in heavyScans]
    avgLightPrecMass = np.average(np.array([scanFDict[lightScanF]['precMass'] for lightScanF in lightScans]))
    
    epSTD = options.ppmstd * 10**-6 * avgLightPrecMass
    
    specs = []
    for i, massIntPairs in enumerate(lightSpecs):
        specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)]
    for i, massIntPairs in enumerate(heavySpecs):
        specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)]
    for spec in specs:
        spec.initializeNoiseModel()
                                                                                                                                                    
    clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD)
    GLFD.addClusterPairingStatsToFeatureList(clusterPairingStats, featureList)

    scoreStats = {}
    truePMs = {}
    prmLadders = {}
    for PSM in LADSSeqInfo[seqEntry]:
        lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
        scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2])

        prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True)
        truePMs[PSM[:2]] = prmLadderWithEnds[-1]
        prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1]
        
    PSMList = scoreStats.keys()
    spectrumOrderedScoreStats, clusterScoreStats = GLFD.compileScoreStats(scoreStats, specs, PSMList)

    spectrumAndPSMSpecificFeatureDict = {}
        
    PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)])
    for i, PSM in enumerate(LADSSeqInfo[seqEntry]):
        PSMSpecificFeatureList = copy.copy(featureList)

        peptLength = len(prmLadders[PSM[:2]]) + 1

        # Add LADS PScore (and normalized variants)  and delta rank, delta score (LADS PScore) to feature list
        PSMSpecificFeatureList += [PSM[0], PSM[0]/peptLength, PSM[0]/len(specs), -i, PSM[0]-LADSSeqInfo[seqEntry][0][0]]
        # Add Total Path Score (and normalized variants) and delta rank, delta score (total path score)  and total minimum node score to feature list
        totalPathScore = scoreStats[PSM[:2]]['Total Path Score']
        PSMSpecificFeatureList += [totalPathScore, totalPathScore/peptLength, totalPathScore/len(specs), -clusterScoreStats['PSM Rankings'][PSMIndexDict[PSM[:2]]], totalPathScore-clusterScoreStats['Max Cluster Path Score'], scoreStats[PSM[:2]]['Total Minimum Node Score']]
        
        # Add minimum path score, maximum path score, (and normalized variants) and minimum score/maximum score for cluster to feature list
        PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Minimum Path Score'], scoreStats[PSM[:2]]['Minimum Path Score']/peptLength, scoreStats[PSM[:2]]['Maximum Path Score'], scoreStats[PSM[:2]]['Maximum Path Score']/peptLength, scoreStats[PSM[:2]]['Minimum Path Score']/scoreStats[PSM[:2]]['Maximum Path Score']]
        
        # Add difference between minimum and maximum ranking for PSM across cluster to feature list
        rankingsForPSM = [spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]] for i in spectrumOrderedScoreStats]
        PSMSpecificFeatureList += [min(rankingsForPSM) - max(rankingsForPSM)]
        
        #Add Number forbidden node pairs (and normalized variants) to feature list
        numForbiddenPairs = Discriminator.getNumForbiddenPairs(prmLadders[PSM[:2]], avgLightPrecMass)
        PSMSpecificFeatureList += [numForbiddenPairs, 2.0*numForbiddenPairs/(peptLength-1)]

        # Add number of ambiguous edges to feature list
        PSMSpecificFeatureList += [len(PSM[2])]
        
        # Add stats for PRM Evidence over cluster (and normalized variants) to feature list
        PSMSpecificFeatureList += [scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['All Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['Majority Evidence']/float(peptLength-1), scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence'], scoreStats[PSM[:2]]['Aggregate PRM Score Statistics']['None Evidence']/float(peptLength-1)]

        # Add stats for paired PRMs and their corresponding ion types to feature list
        pairedPRMStats = Discriminator.getPairedPRMStats(prmLadders[PSM[:2]], clusterPairingStats['Light Merged Spec'], clusterPairingStats['Heavy Merged Spec'], lightSpecs, heavySpecs, clusterPairingStats['Cluster Paired PRM Information'], epSTD=epSTD)
        GLFD.addPairedPRMStatsToFeatureList(pairedPRMStats, PSMSpecificFeatureList, len(prmLadders[PSM[:2]]))

        pairedPRMLadder = pairedPRMStats['Paired PRM Ladder']        
    
        for i, scan in enumerate(lightScans):
            spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList)
            # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list
            pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]]
            numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]]
            spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]]
            
            # Add mass deviation from true peptide mass to feature list
            precMass = scanFDict[scan]['precMass']
            spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)]
        
            peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet)
            GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength)
            GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList)
        
            spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength]
            spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList

        for j, scan in enumerate(heavyScans):
            i = j + len(lightScans)
            
            spectrumSpecificFeatureList = copy.copy(PSMSpecificFeatureList)
            # Add path score (and normalized variants), delta rank, delta score, number of negative PRMs, and minimum node score for spectrum to feature list
            pathScore = spectrumOrderedScoreStats[i]['Path Scores'][PSMIndexDict[PSM[:2]]]
            numNegativePRMs = spectrumOrderedScoreStats[i]['Num Negative PRMs'][PSMIndexDict[PSM[:2]]]
            spectrumSpecificFeatureList += [pathScore, pathScore/peptLength, pathScore/scoreStats[PSM[:2]]['Maximum Path Score'], -spectrumOrderedScoreStats[i]['PSM Rankings'][PSMIndexDict[PSM[:2]]], spectrumOrderedScoreStats[i]['Delta Scores'][PSMIndexDict[PSM[:2]]], numNegativePRMs, numNegativePRMs/float(peptLength-1), spectrumOrderedScoreStats[i]['Min Node Scores'][PSMIndexDict[PSM[:2]]]]
            
            # Add mass deviation from true peptide mass to feature list
            precMass = scanFDict[scan]['precMass']
            spectrumSpecificFeatureList += [abs(truePMs[PSM[:2]] + pairConfig['NMod'] + pairConfig['CMod'] + Constants.mods['H2O'] + Constants.mods['H+'] - precMass)]
            
            peakAnnotationMassOffsetStats = Discriminator.getPeakAnnotationAndMassOffsetStats(DataFile.getMassIntPairs(scanFDict[scan]['dta']), specs[i], prmLadders[PSM[:2]], pairedPRMLadder, PNet)
            GLFD.addPeakAnnotationStatsToFeatureList(PNet, peakAnnotationMassOffsetStats, spectrumSpecificFeatureList, peptLength)
            GLFD.addMassOffsetStatsToFeatureList(peakAnnotationMassOffsetStats, spectrumSpecificFeatureList)
            
            spectrumSpecificFeatureList += [precMass, GLFD.getChargeStateFromDTAFName(scanFDict[scan]['dta']), peptLength]
            spectrumAndPSMSpecificFeatureDict[(scan, PSM[:2])] = spectrumSpecificFeatureList

    return spectrumAndPSMSpecificFeatureDict
            specs = []
            for i, massIntPairs in enumerate(lightSpecs):
                specs += [PN.Spectrum(PNet, scanFDict[lightScans[i]]['precMass'], Nmod=0.0, Cmod=0.0, epsilon=2*epSTD, spectrum=massIntPairs)]
            for i, massIntPairs in enumerate(heavySpecs):
                specs += [PN.Spectrum(PNet, scanFDict[heavyScans[i]]['precMass'], Nmod=pairConfig['NMod'], Cmod=pairConfig['CMod'], epsilon=2*epSTD, spectrum=massIntPairs)]
            for spec in specs:
                spec.initializeNoiseModel()

            clusterPairingStats = Discriminator.getClusterPairingStats(lightSpecs, heavySpecs, avgLightPrecMass, pairConfig, epSTD=epSTD)
            addClusterPairingStatsToFeatureList(clusterPairingStats, featureList)
            
            scoreStats = {}
            truePMs = {}
            prmLadders = {}
            for PSM in LADSSeqInfo[seqEntry]:
                lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
                scoreStats[PSM[:2]] = Discriminator.getScoreStats(specs, lightSeq, ambigEdges=PSM[2])

                prmLadderWithEnds = An.getPRMLadder(lightSeq, ambigEdges=PSM[2], addEnds=True)
                truePMs[PSM[:2]] = prmLadderWithEnds[-1]
                prmLadders[PSM[:2]] = prmLadderWithEnds[1:-1]
            
            PSMList = scoreStats.keys()
            spectrumOrderedScoreStats, clusterScoreStats = compileScoreStats(scoreStats, specs, PSMList)

            PSMIndexDict = dict([(PSM, i) for i, PSM in enumerate(PSMList)])
            for i, PSM in enumerate(LADSSeqInfo[seqEntry]):
                PSMSpecificFeatureList = copy.copy(featureList)
                lightSeq = An.preprocessSequence(PSM[1], seqMap, ambigEdges=PSM[2])
                heavySeq = An.preprocessSequence(PSM[1], heavySeqMaps['silac_light_heavy'], replaceExistingTerminalMods=True, ambigEdges=PSM[2])
                
def getAlignment(deNovoPept, dbPept, AAMap, scoreMatrix, gapOpenPen=-5, gapExtendPen=0):
    alignment = An.alignSequences(deNovoPept, dbPept, AAMap, scoreMatrix, gapOpenPen, gapExtendPen)[1][0]
    return alignment
Ejemplo n.º 35
0
    def do_training(self):
        self.train_accs = []
        self.valid_accs = []
        print("Training")
        with self.sess.as_default():
            start = time.time()
            for step in range(self.num_steps):
                offset = ((step * BATCH_SIZE) %
                          (self.train_labels.shape[0] - BATCH_SIZE))
                batch_data = self.train_dataset[offset:(offset +
                                                        BATCH_SIZE), :, :, :]
                batch_labels = self.train_labels[offset:(offset +
                                                         BATCH_SIZE), :]
                feed_dict = {
                    self.x: batch_data,
                    self.y: batch_labels,
                    self.keep_prob: .9375
                }
                _, l, summary = self.sess.run(
                    [self.train_op, self.loss, self.summary_op],
                    feed_dict=feed_dict)

                if (step % 500 == 0):
                    print("Minibatch loss at step %d: %f, %f" % (step, l, l2))
                    train_pred = self.prediction.eval(
                        feed_dict={
                            self.x: batch_data,
                            self.y: batch_labels,
                            self.keep_prob: 1.0
                        })
                    train_acc = accuracy(train_pred, batch_labels[:, 1:6])
                    self.train_accs.append([step, train_acc])
                    print("Minibatch accuracy: %.1f%%" % train_acc)
                    valid_pred = self.prediction.eval(
                        feed_dict={
                            self.x: self.valid_dataset,
                            self.y: self.valid_labels,
                            self.keep_prob: 1.0
                        })
                    valid_acc = accuracy(valid_pred, self.valid_labels[:, 1:6])
                    self.valid_accs.append([step, valid_acc])
                    print("Validation accuracy: %.1f%%" % valid_acc)
                    save_path = self.saver.save(self.sess, self.savepath)
                    self.writer.add_summary(summary, step)
            # Graph of accuracies
            plt.plot([x[0] for x in self.train_accs],
                     [y[1] for y in self.train_accs], 'b',
                     [a[0] for a in self.valid_accs],
                     [b[1] for b in self.valid_accs], 'r')
            plt.title("Training and Validation Accuracy")

            test_pred = self.prediction.eval(feed_dict={
                self.x: self.test_dataset,
                self.keep_prob: 1.0
            })
            test_acc = accuracy(test_pred, self.test_labels[:, 1:6])
            print("Test accuracy: %.1f%%" % test_acc)

            Analytics.train_time = time.time() - start
            Analytics.train_accuracy = train_acc
            Analytics.valid_accuracy = valid_acc
            Analytics.test_accuracy = test_acc
            Analytics.display()
            Analytics.save()
            plt.show()
log ("Request type: {0}.".format(request['type']))
if request['type'] == 'IntentRequest':
    log ("Intent name: {0}.".format(request['intent']['name']))
    #######################################################
    #         Sensor Reading Request                      #
    #######################################################
    if request['intent']['name'] == 'SensorStatusIntent':
        log ("Sensor value: {0}.".format(request['intent']['slots']['Sensor_Selection']['value']))
        sensor_data = request['intent']['slots']['Sensor_Selection']['value']
        log (sensor_data)
        if sensor_data in ['temperature', 'pressure']:
            if sensor_data == 'temperature':
                tag_name = "raw.temp3.avg"
            else: 
                tag_name = "raw.pressure.avg"
            val = Analytics.last_n_values(tag_name,1)
            if val:
                response_txt = "The most recent " + sensor_data + " is " + str(int(val[0][tag_name]))
                log (response_txt)
            else:
                response_txt = "I could not find any recent " + sensor_data + " values"
        else:
            log ("Error: Not temperature or pressure.")
    #######################################################
    #         LED Control Request                         #
    #######################################################
    elif request['intent']['name'] == 'ledControlIntent' or request['intent']['name'] == 'ledBlinkIntent':
        log ("Intent value: {0}.".format(request['intent']['slots']['led_Selection']['value']))
        led = request['intent']['slots']['led_Selection']['value']
        log (led)
        #
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, spec, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0):
    deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True)
    #print deNovoPRMLadder

    dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True)

    startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept)
    sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength)

    tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags)

    maxScore = None
    maxScoringTag = None
    
    #print sorted(tagGraph.nodes(data=True))
    #print sorted(tagGraph.edges(data=True))
    for tag in nx.topological_sort(tagGraph):
        if tagGraph.node[tag]['position'] == 'internal':
            nodeScore = getScoreFromPRMs(spec, deNovoPRMLadder[tag[0][0]:tag[0][1]+1], deNovoTerm = getDeNovoTerm(tag, len(deNovoPept)))
        else:
            nodeScore = 0
            
        #print 'Tag', tag
        for prevTag in tagGraph.predecessors(tag):
            nModSymbol = None
            # Define terminus of peptide for modification annotation
            if tagGraph.node[prevTag]['position'] == 'start':
                term = 'N-term'
            elif tagGraph.node[tag]['position'] == 'end':
                term = 'C-term'
            else:
                term = None

            
            refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]]
            deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]]
            refSubSeq = dbPept[prevTag[1][1]:tag[1][0]]
            deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]]

            mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol)

            modPenalty = 0
            modScore = 0

            if len(mods) > 0:
                if 'Isobaric Substitution' == mods[0][0]:
                    modPenalty = isobaricPenalty
                    modScore = getTagScore(spec, refSubSeq, startMass= deNovoPRMLadder[prevTag[0][1]], deNovoTerm = None, addTerminalNodes=False, verbose=True)
                    print modScore, refSubSeq
                elif 'Insertion' == mods[0][0]:
                    modPenalty = inDelPenalty
                    modScore = getScoreFromPRMs(spec, deNovoPRMLadder[prevTag[0][1]:tag[0][0]+1], deNovoTerm = None, addTerminalNodes=False)

                elif 'Deletion' == mods[0][0]:
                    modPenalty = inDelPenalty * len(deNovoSubSeq)

                elif 'Undefined Mass Shift' == mods[0][0]:
                    modPenalty = undefModPenalty

                    modPepts = getModPeptides(mods[0], refSubSeq, term, unimodDict)
                    modScores = []
                    for pept in modPepts:
                        modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), pept)]

                    modScore, modPept = max(modScores)
                    mods = (mods[0][:-1] + (modPept[0],),)
                else:
                    modPenalty = defModPenalty
                    
                    modScores = []
                    for modData in mods:
                        modPepts = getModPeptides(modData, refSubSeq, term, unimodDict)
                        for pept in modPepts:
                            modScores += [(getTagScore(spec, pept[0], startMass= deNovoPRMLadder[prevTag[0][1]], ambigEdges=pept[1], deNovoTerm = None, addTerminalNodes=False), (modData, pept))]
                    modScore, modPept = max(modScores)
                    mods = (modPept[0][:-1] + (modPept[1][0],),)

            tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modScore + modPenalty
            tagGraph.edge[prevTag][tag]['mods'] = mods

            if 'score' not in tagGraph.node[prevTag]:
                tagGraph.node[prevTag]['score'] = defaultScore

            try:
                tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore'])
            except KeyError:
                tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + tagGraph.edge[prevTag][tag]['edgeScore']

            if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore:
                maxScore = tagGraph.node[tag]['score']
                maxScoringTag = tag

    if maxScoringTag != None:
        return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag)
    else:
        return None, None, None
Ejemplo n.º 38
0
def process_and_visualize(train_folder="data/train",
                          test_folder="data/test",
                          extra_folder="data/extra",
                          display="",
                          single=False):
    if not (os.path.exists("data/train") or os.path.exists("data/test")
            or os.path.exists("data/extra")):
        if not (os.path.exists("data/train.tar.gz")
                or os.path.exists("data/test.tar.gz")
                or os.path.exists("data/extra.tar.gz")):
            # No tar.gz files found, data must be downloaded
            tr, t, e = download_data()
        # no folders found, need to extract the tar.gz
        train_folder, test_folder, extra_folder = extract_data(tr, t, e)

    ##
    # Set sequence lengths to so multiple runs do not add to old run totals
    Analytics.load()
    Analytics.sequence_lengths = {
        'data/train': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        },
        'data/extra': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        },
        'data/test': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        }
    }
    Analytics.save()

    ##
    # Set the number of datapoints to create based on one image.
    data_points = 5
    if single:
        data_points = 1
    ##
    # Get the DigitStructs for Training Data.
    fin = os.path.join(train_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the training data from the digitStruct.mat file")
    train_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed training data")

    ##
    # Display training examples.
    if display != "":
        print("Displaying Examples with bounding boxes")
        example_indeces = np.random.randint(0, len(train_data), size=5)
        examples = train_data[example_indeces]
        Analytics.load()
        Analytics.train_samples = examples
        Analytics.save()
        for e in examples:
            display_example(e, train_folder)
    ##
    # Preprocess Training data and fetch labels.
    print("Generating data set and processing data.")
    train_dataset, train_labels = generate_dataset(train_data, train_folder,
                                                   single)
    ##
    # Display the processed data.
    if display != "":
        print("Displaying examples of preprocessed images")
        examples = train_dataset[example_indeces]
        labels = train_labels[example_indeces]
        for e, l in zip(examples, labels):
            print("The Label for this is:", l)
            display_processed_example(e)
    ##
    #Delete things to free up space.
    if display != "":
        del example_indeces
        del examples
        del labels
    del train_data

    ##
    # Split the data into training and validation data.
    (train_dataset, train_labels, valid_dataset,
     valid_labels) = split(train_dataset, train_labels,
                           TRAIN_SPLIT * data_points)
    ##
    # Save the split data to disk.
    np.save("temp_train_dataset1", train_dataset)
    np.save("temp_train_labels1", train_labels)
    np.save("temp_valid_dataset1", valid_dataset)
    np.save("temp_valid_labels1", valid_labels)
    ##
    # Delete data to free up space.
    del train_dataset
    del train_labels
    del valid_dataset
    del valid_labels

    ##
    #Repeat the process for Extra Data.
    fin = os.path.join(extra_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the extra data from the digitStruct.mat file")
    extra_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed extra data")
    ##
    # Preprocess extra data and fetch labels
    print("Generating data set and processing data.")
    extra_dataset, extra_labels = generate_dataset(extra_data, extra_folder,
                                                   single)
    ##
    # Delete data to free space.
    del extra_data

    ##
    # Split the data into training and validation sets.
    (train_dataset, train_labels, valid_dataset,
     valid_labels) = split(extra_dataset, extra_labels,
                           EXTRA_SPLIT * data_points)
    ##
    # Save the split data onto disk.
    np.save("temp_train_dataset2", train_dataset)
    np.save("temp_train_labels2", train_labels)
    np.save("temp_valid_dataset2", valid_dataset)
    np.save("temp_valid_labels2", valid_labels)

    ##
    # Delete datasets to free up space.
    del train_dataset
    del train_labels
    del valid_dataset
    del valid_labels
    del extra_dataset
    del extra_labels

    ##
    # Create the Training and Validation sets.
    print("Creating the Training and Validation sets")
    td1 = np.load("temp_train_dataset1.npy")
    td2 = np.load("temp_train_dataset2.npy")
    train_dataset = np.concatenate((td1, td2), axis=0)
    del td1
    del td2
    print(train_dataset.shape)
    ##
    # Find the mean and standard deviation of the training set.
    mean = np.mean(train_dataset)
    std = np.std(train_dataset)

    ##
    # Save the stats about the training set.
    Analytics.load()
    Analytics.mean = mean
    Analytics.std = std
    Analytics.data_set_size['train'] = train_dataset.shape[0]
    Analytics.save()

    ##
    # Normalize the training data.
    train_dataset = normalize(train_dataset, mean, std)
    np.save("data/train_dataset", train_dataset)
    del train_dataset

    ##
    # Create the training labels.
    tl1 = np.load("temp_train_labels1.npy")
    tl2 = np.load("temp_train_labels2.npy")
    train_labels = np.concatenate((tl1, tl2), axis=0)
    del tl1
    del tl2
    np.save("data/train_labels", train_labels)
    del train_labels

    ##
    # Create the validation set
    vd1 = np.load("temp_valid_dataset1.npy")
    vd2 = np.load("temp_valid_dataset2.npy")
    valid_dataset = np.concatenate((vd1, vd2), axis=0)
    del vd1
    del vd2
    ##
    # Normalize the validation dataset with the mean and standard deviation
    # of the training set.
    valid_dataset = normalize(valid_dataset, mean, std)
    np.save("data/valid_dataset", valid_dataset)
    Analytics.load()
    Analytics.data_set_size['valid'] = valid_dataset.shape[0]
    Analytics.save()
    print(valid_dataset.shape)
    del valid_dataset

    ##
    # Create the Validation labels.
    vl1 = np.load("temp_valid_labels1.npy")
    vl2 = np.load("temp_valid_labels2.npy")
    valid_labels = np.concatenate((vl1, vl2), axis=0)
    del vl1
    del vl2
    np.save("data/valid_labels", valid_labels)
    del valid_labels
    print("Finished creating the sets")
    #os.remove("temp_train_dataset1.npy")
    #os.remove("temp_train_dataset2.npy")
    #os.remove("temp_train_labels1.npy")
    #os.remove("temp_train_labels2.npy")
    #os.remove("temp_valid_dataset1.npy")
    #os.remove("temp_valid_dataset2.npy")
    #os.remove("temp_valid_labels1.npy")
    #os.remove("temp_valid_labels2.npy")

    # Create the Test data set
    fin = os.path.join(test_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the test data from the digitStruct.mat file")
    test_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed test data")

    if display != "":
        example_indeces = np.random.randint(0, len(test_data), size=5)
        examples = test_data[example_indeces]
        Analytics.load()
        Analytics.test_samples = examples
        Analytics.save()

    # Preprocess test data and fetch labels
    print("Generating data set and processing data.")
    test_dataset, test_labels = generate_dataset(test_data, test_folder,
                                                 single)

    ##
    # Normalize the test dataset with the mean and standard deviation
    # from the training set.
    test_dataset = normalize(test_dataset, mean, std)
    np.save("data/test_dataset", test_dataset)
    np.save("data/test_labels", test_labels)
    ##
    # Delete to free space
    del test_data
    del fin
    del dsf
    del test_dataset
    del test_labels

    ##
    # Display and save dataset statistics.
    Analytics.display()
    Analytics.save()
Ejemplo n.º 39
0
    if not options.dtaDir or not options.model or not options.config:
        print 'ERROR: missing model, config, or dtaDir'
        exit(-1)
    
    Constants.aminoacids['C'] = (Constants.aminoacids['C'][0], Constants.aminoacids['C'][1], Constants.aminoacids['C'][2] + Constants.mods['Carbamidomethyl'], Constants.aminoacids['C'][3])
    Constants.aminoacids['O'] = (Constants.aminoacids['M'][0], Constants.aminoacids['M'][1] + 'O', Constants.aminoacids['M'][2] + Constants.mods['#'], Constants.aminoacids['M'][3])
    if options.Cmod == Constants.mods['*']:
        Constants.aminoacids['X'] = (Constants.aminoacids['K'][0], Constants.aminoacids['K'][1], Constants.aminoacids['K'][2] + Constants.mods['*'], Constants.aminoacids['K'][3])
    
    PNet = PN.ProbNetwork(options.config, options.model)
    if options.verbose:
        t1 = time.time()
        print 'Getting heavy-light pairs'

    dtaList = glob.glob(options.dtaDir + '/*.dta')
    (paired, unpaired) = Analytics.getPairedAndUnpairedSpectra(options.dtaDir, dtaList, delta=(options.Nmod + options.Cmod), ppm=options.ppm, cutOff=options.pairCutoff)
    if options.verbose:
        t2 = time.time()
        print 'Finished getting paired spectra. Time taken: ', t2 - t1
        print 'Starting Sequencing'
    
    aas = Constants.addPepsToAADict(options.minEdge)
    for pair in paired:
        (lightSpec, heavySpec) = pair[1:]
        if options.verbose:
            print 'Now sequencing %s %s with shared peaks ratio %f' % (lightSpec, heavySpec, pair[0])
            s1 = time.time()
            
        heavyPath = heavySpec
        lightPath = lightSpec
        sharedInfo = DNS.getPairedSpectraInfoForSequencing(lightPath, heavyPath, options.verbose)
    with open(options.unimoddict) as fin:
        unimodDict = pickle.load(fin)
    hashedUnimodDict = hashUnimodDict(unimodDict)

    outFile = open(options.output, 'w')
    cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score']
    if 'Ambig Edges' in infoDict:
        cols.insert(2, 'Ambig Edges')
        
    outFile.write('\t'.join([col for col in cols]) + '\n')

    for entry in DataFile.getScanInfo(options.comp, delimiter='\t'):
        scanData = {}
        scanData['ScanF'] = entry[infoDict['ScanF']]
        scanData['Peptide'] = entry[infoDict['Peptide']]
        scanData['Unmod Peptide'] = An.stripModifications(scanData['Peptide'], noRemove=[])
        scanData['Score'] = entry[infoDict['Score']]
        scanData['Alignment Score'] = None
        
        if 'Ambig Edges' in infoDict:
            ambigEdges = eval(entry[infoDict['Ambig Edges']])
            scanData['Ambig Edges'] = ambigEdges
        else:
            ambigEdges = []
        deNovoPRMLadder = An.getPRMLadder(scanData['Peptide'], ambigEdges=ambigEdges)
                
        refList = eval(entry[infoDict['References']])
        subjSequence = getSequence(options.fasta, refList[0][0])[refList[0][1]-1:refList[0][2]]

        if scanData['Unmod Peptide'] == subjSequence:
            scanData['Modifications'] = []
Ejemplo n.º 41
0
class GameBot:
    browser = None
    executable_path = "source/webdriver/chromedriver"
    logger = Logger.Logger("Logger")
    raid_analytics = Analytics.Analytics()

    def __init__(self, url, username, password, lang="en"):
        self.url = url
        self.username = username
        self.password = password
        self.raid_text = self.get_raid_text(lang)

    def start(self):
        self.browser = webdriver.Chrome(executable_path=self.executable_path)
        self.browser.get(self.url)
        sleep(1)

    def login(self):
        self.random_sleep(0, 300)
        self.browser.find_element_by_name("name").send_keys(self.username)
        self.browser.find_element_by_name("password").send_keys(self.password)
        self.browser.find_element_by_name("s1").click()  # Submit form
        self.logger.add_line("Logging into: " + self.username)
        sleep(1)

    def enter_top_players(self):
        self.browser.get(self.url + "statistiken.php?id=0&idSub=3")
        self.logger.add_line("entering top players")
        sleep(1)

    def grab_raider_table(self):
        # raiders table
        table = self.browser.find_element_by_id("top10_raiders")
        # take all table contents
        rows = table.find_elements_by_tag_name("tr")
        # get the last col (where is the current user's info
        col = rows[-1].find_elements_by_tag_name("td")
        # submit it in database
        self.raid_analytics.add_info(col)
        '''
        for row in rows:
            col = row.find_elements_by_tag_name("td")
            self.raid_analytics.add_info(col)
        '''

    def record_raider_rank(self):
        self.enter_top_players()
        self.grab_raider_table()

    def enter_village(self):
        self.browser.get(self.url + "dorf2.php")
        self.logger.add_line("entering village")  # do something
        sleep(1)

    def enter_rally_point(self):
        self.browser.find_element_by_class_name("g16").click()
        self.logger.add_line("entering rally point")  # do something
        sleep(1)

    def enter_farm_list(self):
        self.browser.find_element_by_class_name("favorKey99").click()
        self.logger.add_line("entering farm list")
        sleep(1)

    def send_farm_list(self, index):
        self.random_sleep(1, 60)
        checkboxes = self.browser.find_elements_by_xpath("//input[contains(@class, 'markAll') "
                                                         "and contains(@class, 'check')]")

        if len(checkboxes) <= index:
            self.logger.add_line("Index is out of checkboxes bounds: " + str(len(checkboxes)))
            return

        checkboxes[index].send_keys(Keys.SPACE)
        buttons = self.browser.find_elements_by_xpath("//button[contains(text(),'" + self.raid_text + "')]")
        if len(buttons) <= index:
            self.logger.add_line("Index is out of button bounds: " + str(len(buttons)))
            return

        buttons[index].click()
        self.logger.add_line("Sent attack on index: " + str(index))
        sleep(2)

    def send_attacks(self, array):
        self.enter_village()
        self.random_sleep(0, 600)
        self.enter_rally_point()
        self.enter_farm_list()
        for i in array:
            self.send_farm_list(int(i))

    def submit(self):
        self.logger.submit()

    def submit_error(self):
        self.logger.add_line("There was an exception during runtime")
        self.logger.submit()

    def random_sleep(self, min_num, max_num):
        if not release:
            return
        sleep_time = random.randint(min_num, max_num)
        self.logger.add_line("sleeping for extra " + str(sleep_time) + " seconds")
        sleep(sleep_time)

    @staticmethod
    def get_raid_text(lang):
        if lang == 'en':
            return "Start raid"
        elif lang == "he":
            return "שלח בזיזה"
def alignDeNovoToDBSequence(deNovoPeptWithMods, deNovoPept, dbPept, hashedUnimodDict, unimodDict, paramsDict, deNovoAmbigEdges = None, tagLength=2, isobaricPenalty=-0.5, defModPenalty=-1, inDelPenalty=-2, undefModPenalty=-3, defaultScore=0):
    deNovoPRMLadder = An.getPRMLadder(deNovoPeptWithMods, ambigEdges = deNovoAmbigEdges, addEnds=True)
    #print deNovoPRMLadder

    print 'De Novo', deNovoPept
    print 'DB', dbPept
    
    dbPRMLadder = An.getPRMLadder(dbPept, addEnds=True)

    startTags, endTags = generateStartAndEndTags(deNovoPept, dbPept)
    sequenceTags = generateSequenceTags(deNovoPept, dbPept, tagLength=tagLength)

    tagGraph = getSequenceTagGraph(startTags, endTags, sequenceTags)

    maxScore = None
    maxScoringTag = None
    
    #print sorted(tagGraph.nodes(data=True))
    #print sorted(tagGraph.edges(data=True))
    for tag in nx.topological_sort(tagGraph):
        nodeScore = tag[0][1] - tag[0][0]
        #print 'Tag', tag
        for prevTag in tagGraph.predecessors(tag):
            nModSymbol = None
            # Define terminus of peptide for modification annotation
            if tagGraph.node[prevTag]['position'] == 'start':
                term = 'N-term'
            elif tagGraph.node[tag]['position'] == 'end':
                term = 'C-term'
            else:
                term = None

            
            refMass = dbPRMLadder[tag[1][0]] - dbPRMLadder[prevTag[1][1]]
            deNovoMass = deNovoPRMLadder[tag[0][0]] - deNovoPRMLadder[prevTag[0][1]]
            refSubSeq = dbPept[prevTag[1][1]:tag[1][0]]
            deNovoSubSeq = deNovoPept[prevTag[0][1]:tag[0][0]]

            mods = resolveInterval(refMass, deNovoMass, refSubSeq, deNovoSubSeq, hashedUnimodDict, unimodDict, paramsDict, term=term, nModSymbol=nModSymbol)
            modPenalty = defModPenalty
            for mod in mods:
                if 'Isobaric Substitution' == mod[0]:
                    modPenalty = isobaricPenalty
                elif 'Insertion' == mod[0] or 'Deletion' == mod[0]:
                    modPenalty = inDelPenalty
                elif 'Undefined Mass Shift' == mod[0]:
                    modPenalty = undefModPenalty
            if not mods:
                modPenalty = 0

            tagGraph.edge[prevTag][tag]['edgeScore'] = nodeScore + modPenalty
            tagGraph.edge[prevTag][tag]['mods'] = mods

            print prevTag, tag, deNovoSubSeq, refSubSeq, mods
            
            if 'score' not in tagGraph.node[prevTag]:
                tagGraph.node[prevTag]['score'] = defaultScore

            try:
                tagGraph.node[tag]['score'] = max(tagGraph.node[tag]['score'], tagGraph.node[prevTag]['score'] + nodeScore + modPenalty)
            except KeyError:
                tagGraph.node[tag]['score'] = tagGraph.node[prevTag]['score'] + nodeScore + modPenalty

            if tagGraph.node[tag]['position'] == 'end' and tagGraph.node[tag]['score'] > maxScore:
                maxScore = tagGraph.node[tag]['score']
                maxScoringTag = tag

    if maxScoringTag != None:
        return getBestAlignment(tagGraph, dbPept, maxScore, maxScoringTag)
    else:
        return None, None, None
Ejemplo n.º 43
0
def get_line():
    global AGENT
    return Analytics.get_fitted_line(AGENT)
Ejemplo n.º 44
0
 
 with open(options.symbolmap, 'r') as fin:
     symbolMap = pickle.load(fin)
 seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict)
 
 if hasattr(options, 'number'):
     minNumScans = int(options.number)
 else:
     minNumScans = 1
     
 processedInfo = {}  
 if options.lads:
     LADSdict = eval(options.lads)
     for tdvfile in LADSdict.keys():
         LADSScanInfo = DataFile.getScanInfo(tdvfile, dbDict['LADS']['fields'], delimiter='\t')
         processedInfo[LADSdict[tdvfile]] = An.preprocessLADSScanInfo(LADSScanInfo, seqMap[LADSdict[tdvfile]], paramsDict['LADS Parameters']['pair configurations'], dbDict['LADS']['fieldmap'])
     
 if options.mascot:
     MASCOTdict = eval(options.mascot)
     processedInfo.update(parseDBScans(MASCOTdict, 'MASCOT', seqMap, dbDict))
     
 if options.sequest:
     SEQUESTdict = eval(options.sequest)
     processedInfo.update(parseDBScans(SEQUESTdict, 'SEQUEST', seqMap, dbDict))
     
 cols = ['ScanF']
 progNames = processedInfo.keys()
 cols.extend([val for val in dbDict[progDict[progNames[0]]]['cols']])
 
 outFile = open(options.output, 'w')
 outFile.write(','.join([col for col in cols]) + '\n')
Ejemplo n.º 45
0
def get_stats():
    global AGENT
    return Analytics.get_stats(AGENT)
    outFile.write('\t'.join([col for col in cols]) + '\n')

    for seqEntry in LADSSeqInfo:
        lightScans = seqEntry[0]
        heavyScans = seqEntry[1]

        scanScoreDict = getScanScoreDictSVM(LADSSeqInfo, seqEntry, scanFDict, svmModel, svmRange, pairConfigurations[pairConfigName], PNet, desired_feats = desired_feats)
        
#        scanScoreDict = getScanScoreDictRankBoost(LADSSeqInfo, seqEntry, scanFDict, rankModel, pairConfigurations['lightdimethyl_heavydimethyl'], PNet)
#        scanScoreDict = getScanScoreDictClusterNormScore(LADSSeqInfo, seqEntry)

        for i, scan in enumerate(lightScans):

            scanData = {'ScanF': scan}
                        
            lightSeq = An.preprocessSequence(scanScoreDict[scan]['Seq'][0], seqMap, ambigEdges=scanScoreDict[scan]['Seq'][1])
            scanData['LADS Sequence'] = lightSeq
            scanData['LADS Ambig Edges'] = scanScoreDict[scan]['Seq'][1]
            scanData['LADS Raw Score'] = scanScoreDict[scan]['Raw Score']
            scanData['LADS Post Score'] = scanScoreDict[scan]['Post Score']
            scanData['M+H'] = scanFDict[scan]['precMass']

            try:
                comp = An.comparePeptideResults(lightSeq, SEQUESTMASCOTResults[scan]['Peptide'], ambigEdges1=scanScoreDict[scan]['Seq'][1], ambigEdges2=[], ppm=20)            
                scanData['SEQUEST XCorr'] = SEQUESTMASCOTResults[scan]['SEQUEST XCorr']
                scanData['MASCOT Ion Score'] = SEQUESTMASCOTResults[scan]['MASCOT Ion Score']
                scanData['SEQUEST MASCOT Sequence'] = SEQUESTMASCOTResults[scan]['Peptide']
                scanData['Accuracy'] = comp[0]
                scanData['Precision'] = comp[1]
            except KeyError:
                scanData['SEQUEST XCorr'] = None
Ejemplo n.º 47
0
def main():
    Analytics.load()
    #Analytics.save()
    if not (os.path.exists("train_dataset.npy")
            or os.path.exists("train_labels.npy")
            or os.path.exists("valid_dataset.npy")
            or os.path.exists("valid_labels.npy")
            or os.path.exists("test_dataset.npy")
            or os.path.exists("test_labels.npy")):
        print("Data does not exist, downloading now.")
        tr, t, e = download_data()
        train_folder, test_folder, extra_folder = extract_data(tr, t, e)

    # create my network object and the Tensorflow graph for it
    net = Network.Network()
    quit = False
    # Loop options because Tensorflow takes so long to import
    while quit == False:
        print("1. Process The Datasets")
        print("2. Train the model")
        print("3. Display Analytics")
        print("4. Example Use")
        print("5. Use the model")
        print("6. Quit")
        user_input = get_user_input("Select a number: ",
                                    "Please enter a number")
        if user_input == 1:
            print("Press enter to just process the data")
            user_input = raw_input("Press y to visualize it also: ")
            process_and_visualize(display=user_input)
        elif user_input == 2:
            user_input = get_user_input("How many training steps? ",
                                        "Please enter a number")
            net.set_num_steps(int(user_input))
            # Load in the data from the .npy files
            net.load_data()
            net.do_training()
        elif user_input == 3:
            Analytics.display()
        elif user_input == 4:
            try:
                net.load()
            except ValueError:
                print("Failed to load model from ", net.savepath)
                print("Did you train your model yet?")
                exit()
            example_plot(net)
        elif user_input == 5:
            # Restore graph from the savepath file
            try:
                net.load()
            except ValueError:
                print("Failed to load model from ", net.savepath)
                print("Did you train your model yet?")
                exit()
            print("1. Use a Camera.")
            print("2. Use an image file.")
            user_input = get_user_input("Select a number: ",
                                        "Please enter a number")
            if int(user_input) == 1:
                use_camera(net)
            else:
                predict_image(net)
        elif user_input == 6:
            quit = True
Ejemplo n.º 48
0
            args.extend(['--lads', '\"' + str(ladsDict) + '\"'])
        else:
            args.extend(['--lads', '\"' + str(ladsDict) + '\"'])
            
        executeProcess(interpreter, 'CompareSearches.py', args, outBase)

    infoMap = DataFile.getDBInfo(options.database, key='infoMap')
    if progDict[options.mainprogname] == 'LADS':
        getPairStats = True
        mainProgFields = ['PScore', 'Num Ambig Edges']
    else:
        getPairStats = False
        mainProgFields = [infoMap[progDict[options.mainprogname]]['Score']]

    if options.denovoscript:
        stats = Analytics.getCompStats(getOutputName(outBase, 'CompareSearches.py', '.tdv'), unitTestName, progDict, infoMap, paramsDict)
    elif options.mainprogname:
        unitTestName = options.mainprogname
        if not options.comp:
            stats = Analytics.getCompStats(getOutputName(outBase, 'CompareSearches.py', '.tdv'), options.mainprogname, progDict, infoMap, paramsDict, getPairStats=getPairStats, mainProgFields=mainProgFields)
        else:
            stats = Analytics.getCompStats(options.comp, options.mainprogname, progDict, infoMap, paramsDict, getPairStats=getPairStats, mainProgFields=mainProgFields)
        
    outFile = open(options.output, 'w')

    outFile.write('\nOverall Comparison Statistics\n')
    writeCategoryInfo(stats, outFile, ['composite'], name='Composite')
    outFile.write('\nTrue Pairs\n')
    if getPairStats:
        for pairConfigName in paramsDict['Pair Configurations']:
            writeCategoryInfo(stats, outFile, ['truepairs', pairConfigName], name='%s True Pairs' % (pairConfigName,))
Ejemplo n.º 49
0
def process_and_visualize(train_folder="train",
                          test_folder="test",
                          extra_folder="extra",
                          display=""):
    if not (os.path.exists("train") or os.path.exists("test")
            or os.path.exists("extra")):
        if not (os.path.exists("train.tar.gz") or os.path.exists("test.tar.gz")
                or os.path.exists("extra.tar.gz")):
            # No tar.gz files found, data must be downloaded
            tr, t, e = download_data()
        # no folders found, need to extract the tar.gz
        train_folder, test_folder, extra_folder = extract_data(tr, t, e)

    # Set sequence lengths to 0 so multiple runs do not add to old run totals
    Analytics.load()
    Analytics.sequence_lengths = {
        'train': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        },
        'extra': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        },
        'test': {
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0
        }
    }
    Analytics.save()

    # Get the DigitStructs for Training Data
    fin = os.path.join(train_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the training data from the digitStruct.mat file")
    train_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed training data")

    if display != "":
        print("Displaying Examples with bounding boxes")
        example_indeces = np.random.randint(0, len(train_data), size=5)
        examples = train_data[example_indeces]
        Analytics.load()
        Analytics.train_samples = examples
        Analytics.save()
        for e in examples:
            display_example(e, train_folder)

    # Preprocess Training data and fetch labels
    print("Generating data set and processing data.")
    train_dataset, train_labels = generate_dataset(train_data, train_folder)

    if display != "":
        print("Displaying examples of preprocessed images")
        examples = train_dataset[example_indeces]
        labels = train_labels[example_indeces]
        for e, l in zip(examples, labels):
            print("The Label for this is:", l)
            display_processed_example(e)

    #Delete things to free up space
    if display != "":
        del example_indeces
        del examples
        del labels
    del train_data

    #Repeat for Extra Data
    fin = os.path.join(extra_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the extra data from the digitStruct.mat file")
    extra_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed extra data")

    # Preprocess extra data and fetch labels
    print("Generating data set and processing data.")
    extra_dataset, extra_labels = generate_dataset(extra_data, extra_folder)

    # Delete to free space
    del extra_data

    # Create the Training and Validation sets
    print("Creating the Training and Validation sets")
    train_dataset, train_labels, valid_dataset, valid_labels = split(
        train_dataset, train_labels, extra_dataset, extra_labels)
    print("Finished creating the sets")

    # Delete to free space
    del extra_dataset
    del extra_labels

    # Create the Test data set
    fin = os.path.join(test_folder, 'digitStruct.mat')
    dsf = DigitStructFile(fin)
    print("Parsing the test data from the digitStruct.mat file")
    test_data = dsf.get_all_digit_structure_by_digit()
    print("Parsed test data")

    if display != "":
        example_indeces = np.random.randint(0, len(test_data), size=5)
        examples = test_data[example_indeces]
        Analytics.load()
        Analytics.test_samples = examples
        Analytics.save()

    # Preprocess test data and fetch labels
    print("Generating data set and processing data.")
    test_dataset, test_labels = generate_dataset(test_data, test_folder)

    # Delete to free space
    del test_data
    del fin
    del dsf

    #Save Data to files
    save(train_dataset, train_labels, valid_dataset, valid_labels,
         test_dataset, test_labels)

    del train_dataset
    del train_labels
    del valid_dataset
    del valid_labels
    del test_dataset
    del test_labels

    Analytics.display()
    Analytics.save()
Ejemplo n.º 50
0
def get_reward_data():
    global AGENT
    return Analytics.get_reward_data(AGENT)
Ejemplo n.º 51
0
        sys.exit(-1)

    progName = processedInfo.keys()[0]

    outFile.write(
        "Scan information fetched. Total number of scans: %i. Number of scans considered for validation: %i"
        % (len(scanFDict), len(processedInfo[progName]))
    )

    progPairs = {}
    for pairConfigName in paramsDict["Pair Configurations"]:
        progPairs[pairConfigName] = An.findPairsInSearchResults(
            processedInfo[progName],
            dbDict["infoMap"],
            progDict,
            paramsDict["Pair Configurations"][pairConfigName],
            progName=progName,
            isComp=False,
            ppm=options.ppmstd,
        )

    pairs = {}
    times = {}

    t1 = time.time()
    print "Getting Clusters"
    clusterSVMModel = svmutil.svm_load_model(paramsDict["Cluster Configuration"]["model"])
    clusterSVMRanges = svmutil.load_ranges(
        os.path.splitext((paramsDict["Cluster Configuration"]["model"]))[0] + ".range"
    )
Ejemplo n.º 52
0
    O.calcula_energia_poblacion()
    O.fitness()
    if resp == 's' or resp == 'S':
        V.array_elite = O.elite()
    O.crossover()
    O.mutacion()
    V.array_poblacion = O.ruleta()
    if resp == 's' or resp == 'S':
        for eli in range(len(V.array_elite)):
            V.array_poblacion.append(V.array_elite[eli])

    O.calcula_energia_poblacion()

    #A.mayor_menor_promedio(cor)

    A.asigna_mvp(cor)

    #V.array_poblacion = np.random.permutation(V.array_poblacion).tolist()
"""for f in range(P.filas):
    print(V.cromosoma_mvp[0][f])

for f in range(P.filas):
    print(V.cromosoma_mvp[2][f])

print(round(V.cromosoma_mvp[1], 2))

print(V.cromosoma_mvp[3])"""

#A.mostrar_grafica(grafica)

A.mostrar_grilla()
        SEQUESTdict = eval(options.sequest)
        processedInfo.update(CS.parseScans(SEQUESTdict, 'SEQUEST', seqMap, dbDict))
    if options.combined:
        combinedDict = eval(options.combined)
        processedInfo.update(CS.parseScans(combinedDict, 'Combined', seqMap, dbDict, delimiter='\t', seqDelimLen=0))
    
    if len(processedInfo.keys()) > 1:
        print 'ERROR: Can only compare results to one database search output. Exiting...'
        sys.exit(-1)
    
    progName = processedInfo.keys()[0]

    dtaList = glob.glob(options.dtadir + '/*.dta')
    scanFDict = getScanFDict(dtaList)
    
    precMassClusters = An.findSamePrecMassClusters(dtaList, ppm=options.ppmstd)

    clusterOut = open(options.output + '_cluster.txt', 'w')

    
    for cluster in precMassClusters:
        if len(cluster) == 1:
            continue

        specs = []
        for scanF in cluster:
            specs += [DataFile.getMassIntPairs(scanFDict[scanF]['dta'])]

        for i in range(len(cluster)):
            for j in range(i+1, len(cluster)):
                if cluster[i] in processedInfo[progName] and cluster[j] in processedInfo[progName]: