Ejemplo n.º 1
0
def intersect_with_skips(p1, p2):
    """
    algorithm in figure 2.10 of IIR
    intersect two postings lists together
    """
    if p1 == [] or p2 == []: return []
    
    answer = []
    ptr1 = 0
    ptr2 = 0
    #print "here ", p1 ,p2
    while ptr1 != len(p1) and ptr2 != len(p2):
        if p1[ptr1][0][0] == p2[ptr2][0][0]:
            answer.append(p1[ptr1][0][0])
            ptr1 += 1
            ptr2 += 1
        else:
            if p1[ptr1][0][0] < p2[ptr2][0][0]:
                # len(p1[ptr1]) == 2 means hasSkip
                # p1[ptr1][1] is the skip pointer
                if len(p1[ptr1]) == 2 and p1[ p1[ptr1][1] ][0] <= p2[ptr2][0][0]:
                    while len(p1[ptr1]) == 2 and p1[ p1[ptr1][1] ][0] <= p2[ptr2][0][0]:
                        ptr1 = p1[ptr1][1]
                else:
                    ptr1 += 1
            else:
                if len(p2[ptr2]) == 2 and p2[ p2[ptr2][1] ][0] <= p1[ptr1][0][0]:
                    while len(p2[ptr2]) == 2 and p2[ p2[ptr2][1] ][0] <= p1[ptr1][0][0]:
                        ptr2 = p2[ptr2][1]
                else:
                    ptr2 += 1
    if answer == []:
        return []
    else:
        return [[e] for e in index.generate_skip_list(answer, always_insert_skips)]
Ejemplo n.º 2
0
Archivo: search.py Proyecto: dw6/NUS
def union_with_skips(p1, p2):
    """
    union two postings lists together
    """
    tmp_dict = {}
    for a in p1:
        tmp_dict[a[0]] = 1
    for a in p2:
        tmp_dict[a[0]] = 1
    answer = tmp_dict.keys()
    answer.sort()
    answer = index.generate_skip_list(answer, always_insert_skips)
    return answer
Ejemplo n.º 3
0
def union_with_skips(p1, p2):
    """
    union two postings lists together
    """
    tmp_dict = {}
    for a in p1:
        tmp_dict[a[0][0]] = 1
    for a in p2:
        tmp_dict[a[0][0]] = 1
    answer = tmp_dict.keys()
    answer.sort()
    answer = index.generate_skip_list(answer, always_insert_skips)
    answer = [[e] for e in answer]
    return answer
Ejemplo n.º 4
0
def process_NOT(a):
    # read the postings list for all doc ID's into all_fids when this function is first called
    global all_fids
    if all_fids == None:
        #all_fids = [pair[0] for pair in eval(linecache.getline(postings_file, 1))]
        print all_length
        f.seek(0)
        all_fids = [pair[0][0] for pair in singleline(f.read(all_length))]
        
    if not isinstance(a, list):
        f.seek(word_dict[a][1])
        a_fids = [pair[0][0] for pair in singleline(f.read(word_dict[a][2]))]
    else:
        a_fids = [pair[0][0] for pair in a]
    other_fids = filter(lambda a: a not in a_fids, all_fids)
    return [[e] for e in index.generate_skip_list(other_fids, always_insert_skips)]
Ejemplo n.º 5
0
def process_NOT(a):
    # read the postings list for all doc ID's into all_fids when this function is first called
    global all_fids
    if all_fids == None:
        #all_fids = [pair[0] for pair in eval(linecache.getline(postings_file, 1))]
        print all_length
        f.seek(0)
        all_fids = [pair[0][0] for pair in singleline(f.read(all_length))]

    if not isinstance(a, list):
        f.seek(word_dict[a][1])
        a_fids = [pair[0][0] for pair in singleline(f.read(word_dict[a][2]))]
    else:
        a_fids = [pair[0][0] for pair in a]
    other_fids = filter(lambda a: a not in a_fids, all_fids)
    return [[e]
            for e in index.generate_skip_list(other_fids, always_insert_skips)]
Ejemplo n.º 6
0
Archivo: search.py Proyecto: dw6/NUS
def process_NOT(a):
    # read the postings list for all doc ID's into all_fids when this function is first called
    global all_fids
    if all_fids == None:

        all_fids = [pair[0] for pair in eval(linecache.getline("all_id.txt", 1))]
    if not isinstance(a, list):
       
        start_byte = word_dict[a[0]][1]
        num_bytes = word_dict[a[0]][2]
        
        fh = open(postings_file)
        fh.seek(start_byte)
        bytestream = fh.read(num_bytes)
        lst = compress.uncompress_postings_list(bytestream)
        
        a_fids = [pair[0] for pair in lst]
    else:
        a_fids = [pair[0] for pair in a]
    
    other_fids = filter(lambda a: a not in a_fids, all_fids)
    return index.generate_skip_list(other_fids, always_insert_skips)
Ejemplo n.º 7
0
def intersect_with_skips(p1, p2):
    """
    algorithm in figure 2.10 of IIR
    intersect two postings lists together
    """
    if p1 == [] or p2 == []: return []

    answer = []
    ptr1 = 0
    ptr2 = 0
    #print "here ", p1 ,p2
    while ptr1 != len(p1) and ptr2 != len(p2):
        if p1[ptr1][0][0] == p2[ptr2][0][0]:
            answer.append(p1[ptr1][0][0])
            ptr1 += 1
            ptr2 += 1
        else:
            if p1[ptr1][0][0] < p2[ptr2][0][0]:
                # len(p1[ptr1]) == 2 means hasSkip
                # p1[ptr1][1] is the skip pointer
                if len(p1[ptr1]) == 2 and p1[p1[ptr1][1]][0] <= p2[ptr2][0][0]:
                    while len(p1[ptr1]
                              ) == 2 and p1[p1[ptr1][1]][0] <= p2[ptr2][0][0]:
                        ptr1 = p1[ptr1][1]
                else:
                    ptr1 += 1
            else:
                if len(p2[ptr2]) == 2 and p2[p2[ptr2][1]][0] <= p1[ptr1][0][0]:
                    while len(p2[ptr2]
                              ) == 2 and p2[p2[ptr2][1]][0] <= p1[ptr1][0][0]:
                        ptr2 = p2[ptr2][1]
                else:
                    ptr2 += 1
    if answer == []:
        return []
    else:
        return [[e]
                for e in index.generate_skip_list(answer, always_insert_skips)]