Example #1
 def findKthLargest(self, nums, k):
     :type nums: List[int]
     :type k: int
     :rtype: int
    # #O(Nlogk)>>insert a element>logk>do it N times
    #  #O(k)>>queue size
    #  if not nums or not k:
    #      return False
    #  #minheap by default
    #  q = PriorityQueue()
    #  for num in nums:
    #      q.put(num)
    #      #maintain qsize k
    #      if q.qsize()>k:
    #          q.get()
    #  return q.get()
     if not nums:
         return 0
     heap = []
     for i in nums:
         heapq.heappush(heap, i)
         if len(heap) > k:
     return heapq.heappop(heap)
Example #2
	def find_hits(self, tensor, opts):
		# 	~~  Former UI FUNCTION  ~~  
	#	  Big idea: get select, best ganglion hits, given the tensor of hits.
		# This returns a nice list of some desired ganglions in the tensor.  The idea is that
		# the parameter opts will allow a way for the user to communicate exactly what they
		# want in a single keyword.
		# Options:  - int for the top <int> results
		#			- str of an int for that percentile, in %
		#			- float for that percentile, in face value
		if isinstance(opts,int) or isinstance(opts,float) or isinstance(opts,str):
			# Tenth percentile gets the top 10th percentile of hits, ignoring edge points.
			# Format: tuple( -hitscore,
			if isinstance(opts,str):
					opts = float(opts)/100
			gang_window = self.gang_window()
			heap = []
			for t1, T1 in enumerate(tensor):
				for t2, T2 in enumerate(T1):
					for t3, T3 in enumerate(T2):
						if T3[1] == gang_window[t1]:
							heapq.heappush(heap, (-T3[0],T3[1],t1,t2,t3))
			x = [(heapq.heappop(heap)) for i in range(len(heap))]
			for i in range(len(x)):
				x[i] = tuple([-x[i][0]]+list(x[i][1:]))
			if opts == 'tenth_percentile' or opts == '10': 
				percentile = int(math.ceil(len(x)/10.0))
			elif isinstance(opts, float):
				percentile = int(math.ceil(len(x)*opts))
				return x[0:percentile]
			elif isinstance(opts, int):
				return x[0:opts]
def astar(start, h, d=None):
    explored = set()
    open = []
    hq.heappush(open, Node(start))

    while open:
        current = hq.heappop(open)

        # used for getting table data
        if d >= 2:
            if current.g == d:
                if isGoal(current):
                    return [current, len(open) + len(explored) + 1]
                    return None

        if isGoal(current):
            return [current, len(open) + len(explored) + 1]

        for succ in successors(current):
            if str(succ) not in explored:
                succ.g = current.g + 1
                succ.f = succ.g + h(succ)
                succ.parent = current
                hq.heappush(open, succ)
    return None
def astar(start):
    pq = []
    closed = set()
    hq.heappush(pq, [start.heuristic(), 0, start, []])
    while pq:
        f, g, cur, path = hq.heappop(pq)
        if cur.isGoal():
            return g, path
        for child, move in cur.moves():
            if str(child) not in closed:
                p = deepcopy(path)
                hq.heappush(pq, [g + 1 + child.heuristic(), g + 1, child, p])
    return None
Example #5
    def _get_next_forwarder(self):
        Get next available forwarder. It will accumulate forwarder load
        and return forwarder with minimal load.

        with self._forwarders_lock.reader_lock:
                forwarder_load = heapq.heappop(self._available_forwarderloads)
                forwarder = forwarder_load.forwarder
                forwarder_load.load += 1
                heapq.heappush(self._available_forwarderloads, forwarder_load)
                return forwarder
            except IndexError:
                raise DispatchEngineException("No available forwarders")
def astar(start):
    pq = []
    closed = set()
    hq.heappush(pq, [start.heuristic(), 0, start, []])
    while pq:
        f, g, cur, path = hq.heappop(pq)
        if cur.isGoal():
            return g, path
        for child, move in cur.moves():
            if str(child) not in closed:
                p = deepcopy(path)
                hq.heappush(pq, [g + 1 + child.heuristic(), g + 1, child, p])
    return None
Example #7
    def get(self, url, max_last_time):
        """ proxy is the form

            if self._pool not all in self._table[domain]:
                add one of the difference to self._table[domain]
                pop item with lowest last_time from priority queue,
                compare last proxied time,
                if no proxy available, put item back, return None.
                else, put item back with now time, update self._table[domain][proxy], return proxy
        domain = urlparse.urlparse(url).netloc
        proxies_table = self._table[domain]
        now = time.time()

        if not self._pool:
        rest_proxies = self._pool.difference(set(proxies_table.keys()))

        if len(rest_proxies) == 0:
                item = heapq.heappop(proxies_table['priority'])
                last_time, count, proxy = item
                if max_last_time < last_time:
                    heapq.heappush(proxies_table['priority'], item)

                _count = self._count_rule('get', count)
                proxies_table[proxy] = [now, _count]
                heapq.heappush(proxies_table['priority'], [now, _count, proxy])
                return proxy
            except IndexError:
                print('priority queue is empty.')
            count = 0
            _count = self._count_rule('get', count)
            proxy = rest_proxies.pop()
            proxies_table[proxy] = [now, _count]

            if 'priority' not in proxies_table:
                proxies_table['priority'] = []
            heapq.heappush(proxies_table['priority'], [now, _count, proxy])
            return proxy
Example #8
def main():
    parser = OptionParser()  #(usage=usage)
        "Input interaction file. Assumed that the first six fields contain two interacting chromosome information."
        "If 1, indicates that input interaction file has a header line (such as field names). Default 1."
                      help="Output merged interaction file")
                      help="Size of bins employed. DEFAULT 5 Kb.")
                      help="Rule of connectivity ( 8 or 4). DEFAULT 8.")
        "Percentage of elements to be selected from each connected component. Default: 100, means all loops would be considered. If specified as 0, only the most significant loops from each component would be selected. For any number x between 0 and 100, top x% of the loops in a component, considering both statistical significance and contact count, would considered for inclusion, subject to the bin and neighborhood contraints."
        "Positive integer (default: 2 with 5 Kb bin size) means that if a loop is included in the final set, loops involving within 2x2 neighborhood of both the bins would be discarded. Applicable only if --percent > 0. Difference in bin size other than 5000 may prompt user to change this value."
        help="Column number storing the contact count. Default: 7.")
        "Column number storing the q-value (or any measure of statistical significance). Default: 0, means the last column of the given interaction file. Any non-zero value would prompt the user to check the corresponding column."
        "Column number storing the p-value (or any measure of statistical significance). Default: 0, means the second last column of the given interaction file. Any non-zero value would prompt the user to check the corresponding column."
        "Binary variable indicating the sorting order of the given significance values. Default 0, means sorting is done by ascending order. If specified 1, sorting is done by descending (reverse) order."
    (options, args) = parser.parse_args()

    # process the input parameters
    if options.InpFile is not None:
        InpFile = options.InpFile
        sys.exit("Input file is not provided - quit !!")

    # output file storing the bed formatted interactions
    if options.OutFile is not None:
        OutFile = options.OutFile
        sys.exit("Output file is not specified - quit !!")

    bin_size = int(options.binsize)
    headerInp = int(options.headerInp)
    connectivity_rule = int(options.connectivity_rule)
    TopPctElem = int(options.TopPctElem)
    NeighborHoodBinThr = (int(options.NeighborHoodBin)) * bin_size

    # parameters regarding significance and sorting order of statistical significance values
    QValCol = int(options.QValCol)
    PValCol = int(options.PValCol)
    CCCol = int(options.CCCol)
    SortOrder = int(options.SortOrder)

    # fix the columns containing P and Q-values
    # by reading the first line of the input interaction file
    fp_in = open(InpFile, 'r')
    l = fp_in.readline()
    contents = l.rstrip().split()
    if (QValCol == 0):
        QValCol = len(contents)

    if (PValCol == 0):
        PValCol = (len(contents) - 1)


    # print the parameters
    if 1:
        print '****** Merge filtering of adjacent loops is enabled *****'
        print '***** within function of merged filtering - printing the parameters ***'
        print '*** bin_size: ', bin_size
        print '*** headerInp: ', headerInp
        print '*** connectivity_rule: ', connectivity_rule
        print '*** TopPctElem: ', TopPctElem
        print '*** NeighborHoodBinThr: ', NeighborHoodBinThr
        print '*** QValCol: ', QValCol
        print '*** PValCol: ', PValCol
        print '*** SortOrder: ', SortOrder

    # open the output file
    # if input interaction file has header information,
    # then dump the header in the output file as well
    fp_outInt = open(OutFile, 'w')

    if (headerInp == 1):
        fp_in = open(InpFile, 'r')
        l = fp_in.readline()
        contents = l.rstrip().split()
        # write the header corresponding to the chromosomes, contact count, P value and the Q value
        fp_outInt.write(contents[0] + '\t' + contents[1] + '\t' + contents[2] +
                        '\t' + contents[3] + '\t' + contents[4] + '\t' +
                        contents[5] + '\t' + contents[CCCol - 1] + '\t' +
                        contents[PValCol - 1] + '\t' + contents[QValCol - 1] +
                        '\t' + 'bin1_low' + '\t' + 'bin1_high' + '\t' +
                        'bin2_low' + '\t' + 'bin2_high' + '\t' + 'sumCC' +
                        '\t' + 'StrongConn')
        fp_outInt.write('chr1' + '\t' + 'start1' + '\t' + 'end1' + '\t' +
                        'chr2' + '\t' + 'start2' + '\t' + 'end2' + '\t' +
                        'CC' + '\t' + 'p' + '\t' + 'fdr' + '\t' + 'bin1_low' +
                        '\t' + 'bin1_high' + '\t' + 'bin2_low' + '\t' +
                        'bin2_high' + '\t' + 'sumCC' + '\t' + 'StrongConn')

    # list of chromosomes to be experimented
    TargetChrList = []
    for i in range(1, 23):
        curr_chr = 'chr' + str(i)

    # output directory
    OutDir = os.path.dirname(os.path.realpath(OutFile))
    if 1:
        print 'OutDir: ', str(OutDir)

    # loop to process individual chromosomes and corresponding data
    for chridx in range(len(TargetChrList)):
        curr_chr = TargetChrList[chridx]
        if 1:
            print 'Processing the chromosome: ', str(curr_chr)

        # extract the interactions of current chromosome from the complete set of interactions
        tempchrdumpfile = OutDir + '/Temp_chr_Dump.bed'
        if (headerInp == 1):
            awkcmd = "cat " + str(
                InpFile) + " | awk \'{if (NR>1 && $1==\"" + str(
                    curr_chr) + "\" && $4==\"" + str(
                        curr_chr) + "\"){print $0}}\' -  > " + str(
            awkcmd = "cat " + str(InpFile) + " | awk \'{if ($1==\"" + str(
                curr_chr) + "\" && $4==\"" + str(
                    curr_chr) + "\"){print $0}}\' -  > " + str(tempchrdumpfile)

        # check the number of dumped interactions
        num_Int = sum(1 for line in open(tempchrdumpfile))

        if (num_Int == 0):
            if 0:
                print 'Number of interactions for this chromosome = 0 --- continue'

        if 0:
            print 'Extracted interactions for the current chromosome'

        # # extract also the max span of interactions (6th column maximum element)
        # # so as to estimate the matrix size
        # temp_log_file = OutDir + '/Temp.log'
        # sys_cmd = "cat " + str(tempchrdumpfile) + " | cut -f6 | sort -nr  > " + str(temp_log_file)
        # os.system(sys_cmd)

        # # determine the maximum coordinate
        # with open(temp_log_file, 'r') as fp_in:
        #     l = fp_in.readline()
        #     max_coord = int((l.rstrip()).split()[0])

        # sys.stdout.flush()  # test

        # # number of bins (matrix dimension)
        # nbins = (max_coord / bin_size)
        # if 0:
        #     print 'max_coord of the interactions: ', str(max_coord)
        #     print 'nbins: ', str(nbins)

        # create a graph which will store the interactions
        G = nx.Graph()

        # create a dictionary for storing the interactions
        CurrChrDict = dict()

        # now scan through the interactions of the extracted chromosome
        # and create a dictionary whose keys are the interacting bin numbers
        with open(tempchrdumpfile, 'r') as fp:
            for line in fp:
                linecontents = (line.rstrip()).split()
                # we set the bin number according to the end coordinate
                bin1 = int(linecontents[2]) / bin_size
                bin2 = int(linecontents[5]) / bin_size
                if (bin1 < bin2):
                    curr_key = (bin1, bin2)
                    curr_key = (bin2, bin1)
                # assign the key to the dictionary
                # add the contact count, P value and Q value information as well
                    Interaction(int(linecontents[CCCol - 1]),
                                float(linecontents[PValCol - 1]),
                                float(linecontents[QValCol - 1])))
                # add the node to the given graph as well
                if 0:
                    print 'Current interaction: ', str(
                        line), '  ------ curr_key: ', curr_key

        # now check the nodes of G
        # assign edges of G according to the 8 / 4 connectivity rule (according to the input parameter)
        nodelist = list(G.nodes())

        for i in range(len(nodelist) - 1):
            node1 = nodelist[i]
            for j in range(i + 1, len(nodelist)):
                node2 = nodelist[j]
                if 0:
                    print 'Checking the edge between node 1: ', node1, '  and node 2: ', node2
                # check if there should be an edge between node1 and node2
                # according to the desired connectivity rule
                if (connectivity_rule == 8):
                    if (abs(node1[0] - node2[0]) <=
                            1) and (abs(node1[1] - node2[1]) <= 1):
                        G.add_edge(node1, node2)
                        if 0:
                            print '8 connectivity Edge between node 1: ', node1, '  and node 2: ', node2
                if (connectivity_rule == 4):
                    if ((abs(node1[0] - node2[0]) + abs(node1[1] - node2[1]))
                            <= 1):
                        G.add_edge(node1, node2)
                        if 0:
                            print '4 connectivity Edge between node 1: ', node1, '  and node 2: ', node2

        # check the edges of G
        edgelist = list(G.edges())

        if 1:
            print 'No of nodes of G: ', G.number_of_nodes()
            print 'No of edges of G: ', G.number_of_edges()
            print 'Number of connected components of G: ', nx.number_connected_components(

        # scan through individual connected components
        # for each such connected component (list of interactions)
        # we find a representative interaction and print it in the final output file
        list_conn_comp = sorted(nx.connected_components(G),

        if 0:
            print '\n\n**** Number of connected components: ', len(
                list_conn_comp), '  ****\n\n'

        # process individual connected components
        for i in range(len(list_conn_comp)):
            # a connected component - a particular list of connected nodes
            curr_comp_list = list(list_conn_comp[i])
            if 0:
                print '\n\n\n  ===>>>>> Processing the connected component no: ', i, '  list: ', str(
                    curr_comp_list), '  number of elements: ', len(

            # from the first interacting bin set, get the lower and higher bin index
            min_idx_bin1 = min([x[0] for x in curr_comp_list])
            max_idx_bin1 = max([x[0] for x in curr_comp_list])
            if 0:
                print 'min_idx_bin1: ', min_idx_bin1, ' max_idx_bin1: ', max_idx_bin1

            # get the span of coordinates for the first interacting bin (set)
            span_low_bin1 = (min_idx_bin1 - 1) * bin_size
            span_high_bin1 = max_idx_bin1 * bin_size
            if 0:
                print 'span_low_bin1: ', span_low_bin1, ' span_high_bin1: ', span_high_bin1

            # from the second interacting bin set, get the lower and higher bin index
            min_idx_bin2 = min([x[1] for x in curr_comp_list])
            max_idx_bin2 = max([x[1] for x in curr_comp_list])
            if 0:
                print 'min_idx_bin2: ', min_idx_bin2, ' max_idx_bin2: ', max_idx_bin2

            # get the span of coordinates for the first interacting bin (set)
            span_low_bin2 = (min_idx_bin2 - 1) * bin_size
            span_high_bin2 = max_idx_bin2 * bin_size
            if 0:
                print 'span_low_bin2: ', span_low_bin2, ' span_high_bin2: ', span_high_bin2

            # sum of contact counts for all the interacting bins
            # within this set of connected nodes
            sum_cc = sum([CurrChrDict[x]._GetCC() for x in curr_comp_list])

            # now get the percentage of bin pairs within this set of connected component
            # having a significant interaction
            total_possible_bin_pairs = (max_idx_bin1 - min_idx_bin1 +
                                        1) * (max_idx_bin2 - min_idx_bin2 + 1)
            possible_bin_pairs = 0
            for b1 in range(min_idx_bin1, (max_idx_bin1 + 1)):
                for b2 in range(min_idx_bin2, (max_idx_bin2 + 1)):
                    bin_pair_key = (b1, b2)
                    if bin_pair_key in CurrChrDict:
                        possible_bin_pairs = possible_bin_pairs + 1

            # % of bin pairs within the region spanned by this connected component
            # having significant interaction
            # the higher the %, the better this component is strongly connected
            Percent_Significant_BinPair = (possible_bin_pairs *
                                           1.0) / total_possible_bin_pairs

            if 0:
                print ' ==>>> total_possible_bin_pairs: ', total_possible_bin_pairs, ' possible_bin_pairs: ', possible_bin_pairs, ' % clique: ', Percent_Significant_BinPair

            # approach 1 :
            # if TopPctElem = 0 then
            # get the bin having maximum statistical significance and corresponding bin pairs
            # ties are resolved by maximum contact count
            # if SortOrder = 0, maximum statistical significance === min P and Q values
            # if SortOrder = 1, maximum statistical significance === max P and Q (equivalent) measures
            if (TopPctElem == 0):

                for j in range(len(curr_comp_list)):
                    curr_key = curr_comp_list[j]
                    curr_cc = CurrChrDict[curr_key]._GetCC()
                    curr_pval = CurrChrDict[curr_key]._GetPVal()
                    curr_qval = CurrChrDict[curr_key]._GetQVal()

                    curr_key_bin1_mid = (((curr_key[0] - 1) * bin_size) +
                                         (curr_key[0] * bin_size)) / 2
                    curr_key_bin2_mid = (((curr_key[1] - 1) * bin_size) +
                                         (curr_key[1] * bin_size)) / 2
                    if 0:
                        print ' Connected component index: ', j, ' curr_key: ', curr_key, ' bin 1 mid: ', curr_key_bin1_mid, ' bin 2 mid: ', curr_key_bin2_mid, ' CC: ', curr_cc, ' Pval: ', curr_pval, ' Qval: ', curr_qval
                    if (j == 0):
                        # first index
                        rep_bin_key = curr_key
                    elif (SortOrder == 0) and (
                            curr_pval < CurrChrDict[rep_bin_key]._GetPVal()
                    ) and (curr_qval < CurrChrDict[rep_bin_key]._GetQVal()):
                        # current element has higher statistical significance (lower P or Q value when SortOrder = 0)
                        rep_bin_key = curr_key
                    elif (SortOrder == 1) and (
                            curr_pval > CurrChrDict[rep_bin_key]._GetPVal()
                    ) and (curr_qval > CurrChrDict[rep_bin_key]._GetQVal()):
                        # current element has higher statistical significance (higher P or Q value when SortOrder = 1)
                        rep_bin_key = curr_key
                    elif (curr_pval
                          == CurrChrDict[rep_bin_key]._GetPVal()) and (
                              curr_qval == CurrChrDict[rep_bin_key]._GetQVal()
                          ) and (curr_cc > CurrChrDict[rep_bin_key]._GetCC()):
                        # current element has equal P and Q values
                        # but higher contact count
                        rep_bin_key = curr_key

                # fix the representative interaction
                rep_bin1_low = (rep_bin_key[0] - 1) * bin_size
                rep_bin1_high = rep_bin_key[0] * bin_size
                rep_bin2_low = (rep_bin_key[1] - 1) * bin_size
                rep_bin2_high = rep_bin_key[1] * bin_size
                cc = CurrChrDict[rep_bin_key]._GetCC()
                pval = CurrChrDict[rep_bin_key]._GetPVal()
                qval = CurrChrDict[rep_bin_key]._GetQVal()

                if 0:
                    print '**** Selected bin key: ', rep_bin_key, ' start bin mid: ', (
                        rep_bin1_low + rep_bin1_high) / 2, ' end bin mid: ', (
                            rep_bin2_low + rep_bin2_high
                        ) / 2, ' cc: ', cc, ' pval: ', pval, ' qval: ', qval

                # write the interaction in the specified output file
                fp_outInt.write('\n' + str(curr_chr) + '\t' +
                                str(rep_bin1_low) + '\t' + str(rep_bin1_high) +
                                '\t' + str(curr_chr) + '\t' +
                                str(rep_bin2_low) + '\t' + str(rep_bin2_high) +
                                '\t' + str(cc) + '\t' + str(pval) + '\t' +
                                str(qval) + '\t' + str(span_low_bin1) + '\t' +
                                str(span_high_bin1) + '\t' +
                                str(span_low_bin2) + '\t' +
                                str(span_high_bin2) + '\t' + str(sum_cc) +
                                '\t' + str(Percent_Significant_BinPair))

            # approach 2:
            # if TopPctElem > 0, and TopPctElem < 100
            # then get the top (TopPctElem %) elements from
            # each connected component, (P and Q values, contact counts)
            # use these elements if they satisfy the bin neighborhood threshold
            if (TopPctElem > 0) and (TopPctElem < 100):

                # list to store the q-values and the CC values for individual bin pairs
                # for their sequential extraction
                Curr_Comp_Tuple_List = []

                # lists storing different attributes
                curr_conn_comp_CCList = []
                curr_conn_comp_QValList = []

                # process individual elements within this connected component
                for j in range(len(curr_comp_list)):
                    curr_key = curr_comp_list[j]
                    curr_cc = CurrChrDict[curr_key]._GetCC()
                    curr_pval = CurrChrDict[curr_key]._GetPVal()
                    curr_qval = CurrChrDict[curr_key]._GetQVal()

                    # create a min-heap structure
                    # first element: q-value
                    # if SortOrder = 0, lower: better: use the same sign when insering in the heap
                    # if SortOrder = 1, higher: better: reverse the sign when insering in the heap
                    # for ties, second element (contact count) - higher: better
                    # so, use negative signs
                    if (SortOrder == 0):
                        subl = [
                            curr_qval, ((-1) * curr_cc), curr_key[0],
                        subl = [((-1) * curr_qval), ((-1) * curr_cc),
                                curr_key[0], curr_key[1]]

                    # insert the element in the designated queue
                    heapq.heappush(Curr_Comp_Tuple_List, subl)

                # first get the maximum / minimum values from these lists
                max_cc = max(curr_conn_comp_CCList)
                min_qval = min(curr_conn_comp_QValList)

                # now obtain the values of top K % elements
                # from these lists
                # where K = 50 means it is median
                custom_cc = custom_percent(curr_conn_comp_CCList, TopPctElem,
                custom_qval = custom_percent(curr_conn_comp_QValList,
                                             TopPctElem, (SortOrder + 1))

                if 0:
                    print ' --> current connected component: max CC: ', max_cc, '  min Q val: ', min_qval, '  top K (TopPctElem): ', TopPctElem, '  custom_cc threshold: ', custom_cc, '  custom_qval threshold: ', custom_qval

                # this list stores the candidate interactions
                # from this particular connected component
                # that will be used in the final set of interactions
                Final_Rep_Key_List = []

                # now extract elements from the constructed queue
                while (len(Curr_Comp_Tuple_List) > 0):
                    curr_elem = heapq.heappop(Curr_Comp_Tuple_List)
                    if 0:
                        print 'extracted element from heap: ', curr_elem

                    # earlier condition - 1 - sourya
                    # consider only those interactions
                    # which have sufficient values of both contact count
                    # and q-values

                    # # terminating condition - do not consider elements
                    # # with lower log 10 Q values than the custom_logqval
                    # if ((curr_elem[0] * (-1)) < custom_logqval):
                    #     break

                    # # coninue if the contact count falls below the designated threshold
                    # if ((curr_elem[1] * (-1)) < custom_cc):
                    #     continue
                    # modified condition - sourya
                    # consider those interactions having
                    # significance value > K percentile
                    if ((SortOrder == 0) and (curr_elem[0] > custom_qval)) or (
                        (SortOrder == 1) and (curr_elem[0] < custom_qval)):

                    # if this is the first element
                    # then insert they key in the candidate set of interactions
                    if (len(Final_Rep_Key_List) == 0):
                        subl = [curr_elem[2], curr_elem[3]]
                        if 0:
                            print '\t\t *** inserted element in the final list: ', str(
                                subl), '  generated Final_Rep_Key_List: ', str(

                    # otherwise, check with the existing interactions
                    # and do not include if the bin falls within a certain
                    # neighborhood of earlier included interactions
                    # the neighborhood is already mentioned via command line parameters
                    flag = False
                    for i in range(len(Final_Rep_Key_List)):
                        # both ends of the bins should be within neighborhood thresholds
                        # of existing contacts
                        if (((abs(Final_Rep_Key_List[i][0] - curr_elem[2])) *
                             bin_size) <= NeighborHoodBinThr) and ((
                                 (abs(Final_Rep_Key_List[i][1] - curr_elem[3]))
                                 * bin_size) <= NeighborHoodBinThr):
                            flag = True
                            if 0:
                                print ' --- current element is within neighborhood of the bins indexed by ', i, '  of Final_Rep_Key_List'

                    if (flag == False):
                        # there is no such neighborhood constraints
                        # include the bin
                        subl = [curr_elem[2], curr_elem[3]]
                        if 0:
                            print '\t\t *** inserted element in the final list: ', str(
                                subl), '  generated Final_Rep_Key_List: ', str(

                # now print the candidate interactions
                # of the current component
                for i in range(len(Final_Rep_Key_List)):
                    rep_bin_key = (Final_Rep_Key_List[i][0],

                    # fix the representative interaction
                    rep_bin1_low = (rep_bin_key[0] - 1) * bin_size
                    rep_bin1_high = rep_bin_key[0] * bin_size
                    rep_bin2_low = (rep_bin_key[1] - 1) * bin_size
                    rep_bin2_high = rep_bin_key[1] * bin_size
                    cc = CurrChrDict[rep_bin_key]._GetCC()
                    pval = CurrChrDict[rep_bin_key]._GetPVal()
                    qval = CurrChrDict[rep_bin_key]._GetQVal()

                    if 1:
                        print '**** Selected bin key: ', rep_bin_key, ' start bin mid: ', (
                            rep_bin1_low + rep_bin1_high
                        ) / 2, ' end bin mid: ', (
                            rep_bin2_low + rep_bin2_high
                        ) / 2, ' cc: ', cc, ' pval: ', pval, ' qval: ', qval

                    # write the interaction in the specified output file
                    fp_outInt.write('\n' + str(curr_chr) + '\t' +
                                    str(rep_bin1_low) + '\t' +
                                    str(rep_bin1_high) + '\t' + str(curr_chr) +
                                    '\t' + str(rep_bin2_low) + '\t' +
                                    str(rep_bin2_high) + '\t' + str(cc) +
                                    '\t' + str(pval) + '\t' + str(qval) +
                                    '\t' + str(span_low_bin1) + '\t' +
                                    str(span_high_bin1) + '\t' +
                                    str(span_low_bin2) + '\t' +
                                    str(span_high_bin2) + '\t' + str(sum_cc) +
                                    '\t' + str(Percent_Significant_BinPair))

            # approach 3:
            # if TopPctElem = 100 (latest implementation)
            # then sequentially obtain the interactions with the lowest q-value
            # and break the tie for the higher contact count
            # use these elements if they satisfy the bin neighborhood threshold
            if (TopPctElem == 100):

                # list to store the q-values and the CC values for individual bin pairs
                # for their sequential extraction
                Curr_Comp_Tuple_List = []

                # lists storing different attributes
                curr_conn_comp_CCList = []
                curr_conn_comp_QValList = []

                # process individual elements within this connected component
                for j in range(len(curr_comp_list)):
                    curr_key = curr_comp_list[j]
                    curr_cc = CurrChrDict[curr_key]._GetCC()
                    curr_qval = CurrChrDict[curr_key]._GetQVal()

                    # create a min-heap structure
                    # first element: q-value
                    # if SortOrder = 0, lower: better: use the same sign when insering in the heap
                    # if SortOrder = 1, higher: better: reverse the sign when insering in the heap
                    # for ties, second element (contact count) - higher: better
                    # so, use negative signs
                    if (SortOrder == 0):
                        subl = [
                            curr_qval, ((-1) * curr_cc), curr_key[0],
                        subl = [((-1) * curr_qval), ((-1) * curr_cc),
                                curr_key[0], curr_key[1]]

                    # insert the element in the designated queue (min-heap property)
                    heapq.heappush(Curr_Comp_Tuple_List, subl)

                # this list stores the candidate interactions
                # from this particular connected component
                # that will be used in the final set of interactions
                Final_Rep_Key_List = []

                if 0:
                    print ' **** Processing the connected component ===== number of elements: ', len(

                # now extract elements from the constructed queue
                while (len(Curr_Comp_Tuple_List) > 0):
                    # extract the first element from the min-heap
                    # element with the lowest value
                    curr_elem = heapq.heappop(Curr_Comp_Tuple_List)
                    if 0:
                        print 'extracted element from heap: ', curr_elem

                    # if this is the first element
                    # then insert they key in the candidate set of interactions
                    if (len(Final_Rep_Key_List) == 0):
                        subl = [curr_elem[2], curr_elem[3]]
                        if 0:
                            print '*** inserted element in the final list: ', str(

                    # otherwise, check with the existing interactions
                    # and do not include if the bin falls within a certain
                    # neighborhood of earlier included interactions
                    # the neighborhood is already mentioned via command line parameters
                    flag = False
                    for i in range(len(Final_Rep_Key_List)):
                        # both ends of the bins should be within neighborhood thresholds
                        # of existing contacts
                        if (((abs(Final_Rep_Key_List[i][0] - curr_elem[2])) *
                             bin_size) <= NeighborHoodBinThr) and ((
                                 (abs(Final_Rep_Key_List[i][1] - curr_elem[3]))
                                 * bin_size) <= NeighborHoodBinThr):
                            flag = True
                            if 0:
                                print ' --- current element is within neighborhood of the existing (included) bin ', Final_Rep_Key_List[

                    if (flag == False):
                        # there is no such neighborhood constraints
                        # include the bin
                        subl = [curr_elem[2], curr_elem[3]]
                        if 0:
                            print '*** inserted element in the final list: ', str(

                # now print the candidate interactions
                # of the current component
                if 0:
                    print '\n\n**** Printing selected loops of the connected component ***\n\n'

                for i in range(len(Final_Rep_Key_List)):
                    rep_bin_key = (Final_Rep_Key_List[i][0],

                    # fix the representative interaction
                    rep_bin1_low = (rep_bin_key[0] - 1) * bin_size
                    rep_bin1_high = rep_bin_key[0] * bin_size
                    rep_bin2_low = (rep_bin_key[1] - 1) * bin_size
                    rep_bin2_high = rep_bin_key[1] * bin_size
                    cc = CurrChrDict[rep_bin_key]._GetCC()
                    pval = CurrChrDict[rep_bin_key]._GetPVal()
                    qval = CurrChrDict[rep_bin_key]._GetQVal()
                    if 0:
                        print 'Selected bin key: ', rep_bin_key, ' start bin mid: ', (
                            rep_bin1_low + rep_bin1_high
                        ) / 2, ' end bin mid: ', (
                            rep_bin2_low + rep_bin2_high
                        ) / 2, ' cc: ', cc, ' pval: ', pval, ' qval: ', qval

                    # write the interaction in the specified output file
                    fp_outInt.write('\n' + str(curr_chr) + '\t' +
                                    str(rep_bin1_low) + '\t' +
                                    str(rep_bin1_high) + '\t' + str(curr_chr) +
                                    '\t' + str(rep_bin2_low) + '\t' +
                                    str(rep_bin2_high) + '\t' + str(cc) +
                                    '\t' + str(pval) + '\t' + str(qval) +
                                    '\t' + str(span_low_bin1) + '\t' +
                                    str(span_high_bin1) + '\t' +
                                    str(span_low_bin2) + '\t' +
                                    str(span_high_bin2) + '\t' + str(sum_cc) +
                                    '\t' + str(Percent_Significant_BinPair))


    # after processing all the chromosomes, now close the output interaction file

    # # remove the temp log file which stores the max coordinate for a chromosome
    # sys_cmd = "rm " + str(temp_log_file)
    # os.system(sys_cmd)

    # remove the temporary chromosome specific interaction dump file
    sys_cmd = "rm " + str(tempchrdumpfile)

    if 1:
        print '==================== End of merge filtering adjacent interactions !!! ======================'
Example #9
 def get(self):
     """ Remove and return the smallest item from the queue
     smallest = heapq.heappop(self.heap)
     del self.set[smallest]
     return smallest
Example #10
    def aStarSearch(self, distance):

            path_queue = []
            visited_list = []
            count = float(0)
            base = float(10000000000)
            heapq.heappush(path_queue, (1, self))
            temp, node = heapq.heappop(path_queue)
            #print(f'This is visited set{visited_list}')
            if self != None:
                while node.checkEnd() == False:
                    #while count < 100:

                    if node.left != None:
                        if node.left.state not in visited_list:
                            count += 1
                                node.cost_history + node.pathcost(node.left))
                                (node.left.cost_history +
                                 node.left.heuristics(distance, end_position) +
                                 count / base, node.left))
                            #print(f'Visited {node.left.state}')

                    if node.right != None:
                        if node.right.state not in visited_list:
                            count += 1
                                node.cost_history + node.pathcost(node.right))
                            heapq.heappush(path_queue, (
                                node.right.cost_history +
                                node.right.heuristics(distance, end_position) +
                                count / base, node.right))
                            #print(f'Visited {node.right.state}')

                    if node.up != None:
                        if node.up.state not in visited_list:
                            count += 1
                            node.up.updateCosthistory(node.cost_history +
                                (node.up.cost_history +
                                 node.up.heuristics(distance, end_position) +
                                 count / base, node.up))

                            #print(f'Visited {node.up.state}')

                    if node.down != None:
                        if node.down.state not in visited_list:
                            count += 1
                                node.cost_history + node.pathcost(node.down))
                                (node.down.cost_history +
                                 node.down.heuristics(distance, end_position) +
                                 count / base, node.down))

                            #print(f'Visited {node.down.state}')

                    temp, node = heapq.heappop(path_queue)
                    #if count >100000:
                    #   return None
                    #print (f'layer {count}')
                    #print(f'This is visited set{visited_list}')
                return node
            return None
Example #11
    def uniformCostSearch(self):
            path_queue = []
            visited_list = []
            count = float(0)
            base = float(10000000000)
            heapq.heappush(path_queue, (1, self))
            temp, node = heapq.heappop(path_queue)
            #print(f'This is visited set{visited_list}')
            if self != None:
                while node.checkEnd() == False:
                    #while count < 100:

                    if node.left != None:
                        if node.left.state not in visited_list:
                            count += 1
                                           (node.pathcost(node.left) +
                                            count / base, node.left))
                            #print(f'Visited {node.left.state}')

                    if node.right != None:
                        if node.right.state not in visited_list:
                            count += 1
                                           (node.pathcost(node.right) +
                                            count / base, node.right))
                            #print(f'Visited {node.right.state}')

                    if node.up != None:
                        if node.up.state not in visited_list:
                            count += 1
                                           (node.pathcost(node.up) +
                                            count / base, node.up))

                            #print(f'Visited {node.up.state}')

                    if node.down != None:
                        if node.down.state not in visited_list:
                            count += 1
                                           (node.pathcost(node.down) +
                                            count / base, node.down))

                            #print(f'Visited {node.down.state}')

                    temp, node = heapq.heappop(path_queue)

                    #print (f'layer {count}')
                    #print(f'This is visited set{visited_list}')
                return node
            return None
Example #12
 def get(self):
     """ Remove and return the smallest item from the queue
     smallest = heapq.heappop(self.heap)
     del self.set[smallest]
     return smallest
Example #13
 def findKthLargest(self, nums, k):
     from Queue import heapq
     sorted_heap = [heapq.heappop(nums) for _ in xrange(len(nums))]
     return sorted_heap[-k]
Example #14
 def search(self, query, return_length=100, passage_len=50, return_urls_only=False):
         Performs search on loaded data. 
         Returns list of sorted by rank:
           * tuples (url, rank) if return_urls_only == False
           * url                if return_urls_only == True
     query = query.strip()
     words = filter(lambda x: x != '', query.split(" "))
     result = None
     if len(words) == 0:
         return []
     word_index = [None] * len(words)
     for z, word in enumerate(words):
         word = self.norm(word.decode('utf-8').strip())
         if word in self.dictionary:
             self.index.seek(self.dictionary[word], 0)
             compressed = self.index.readline().strip()
             decompressed = None
             if self.encoding == VARBYTE:
                 decompressed = decode_varbyte(base64.b64decode(compressed))
             elif self.encoding == SIMPLE9:
                 decompressed = decode_simple9(base64.b64decode(compressed))
             decompressed, word_index[z] = from_flat(decompressed)
             if result == None:
                 result = decompressed
                 result = join(result, decompressed)
     k1 = 2
     b = 0.75
     if result == None or len(result) == 0:
         return []
     #Now we have a list of candidates. We apply BM25 to leave only return_length of them
     avg_len = 0.
     j = 0
     while word_index[j] == None:
         j += 1
     for i in xrange(len(result)):
         if result[i] in word_index[j]:
             avg_len += word_index[j][result[i]][0]
     avg_len /= len(result)
     BM25 = [0] * len(result)
     for j in xrange(len(words)):
         if word_index[j] != None:
             idf = log(float(self.N) / len(word_index[j]))
             for i in xrange(len(result)):
                 if result[i] in word_index[j]:
                     tf = float(len(word_index[j][result[i]][1])) / word_index[j][result[i]][0]
                     BM25[i] += tf * idf / (tf + k1 * (b + word_index[j][result[i]][0] / avg_len * (1 - b)))
     if len(result) > return_length:
         tpr = [(x, y) for x, y in zip(BM25, result)]
         heap = tpr[:return_length]
         for rank, ind in tpr[return_length:]:
             if heapq.nsmallest(1, heap)[0][0] < rank:
                 heapq.heappush(heap, (rank, ind))
         result = [ind for rank, ind in heap]
     #Now we have a shortened list of candidates. We apply passage algorithm to leave top maxPASSpass
     scores = [0] * len(result)
     for i in xrange(len(result)):
         passage = []
         for j in xrange(len(words)):
              if word_index[j] != None and result[i] in word_index[j]:
                     passage.extend([(x, j) for x in word_index[j][result[i]][1]])
         l = 0
         r = 0
         features = [0] * 5
         for l in xrange(len(passage)):
             for r in xrange(l, len(passage)):
                 if passage[r][0] - passage[l][0] + 1 > passage_len:
                 passage_w = [x[1] for x in passage[l:r+1]]
                 features[0] = len(set([x[1] for x in passage[l:r+1]])) / float(len(words))
                 features[1] = 1 - float(passage[l][0]) / word_index[passage[l][1]][result[i]][0]
                 features[2] = 1 - float(r - l + 1) / (passage[r][0] - passage[l][0] + 1)
                 features[3] = 0
                 for j in xrange(len(words)):
                     if word_index[j] != None:
                         idf = log(float(self.N) / len(word_index[j])) / log(self.N)
                         tf = float(passage_w.count(j)) / (passage[r][0] - passage[l][0] + 1)
                         features[3] += tf * idf
                 features[4] = 0
                 for j in xrange(len(passage_w)-1):
                     for k in xrange(j + 1, len(passage_w)):
                         if passage_w[j] > passage_w[k]:
                             features[4] += 1
                 if len(passage_w) != 1:
                     features[4] /= float(len(passage_w) * (len(passage_w) - 1) / 2)
                 score = reduce(lambda x,y: x + y, features)
                 if score > scores[i]:
                     scores[i] = score
     final_result = []
     for score, url_id in sorted(zip(scores, result), reverse=True):
         final_result.append((url_id, self.urls[url_id], score))
     if return_urls_only:
         final_result = [x[:-1] for x in final_result]
     return final_result