コード例 #1
0
ファイル: megasearch.py プロジェクト: etomm/usntssearch
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}):
    results = []
    titles = []
    sptitle_collection = []

    # ~ stats for each provider
    for provid in xrange(len(rawResults)):
        if (len(rawResults[provid])):
            results_stats[str(rawResults[provid][0]['providertitle'])] = [len(rawResults[provid]), 0]

    # ~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            if (rawResults[provid][z]['title'] != None):
                rawResults[provid][z]['title'] = SearchModule.sanitize_html(rawResults[provid][z]['title'])
                rawResults[provid][z]['provid'] = provid
                title = SearchModule.sanitize_strings(rawResults[provid][z]['title'])
                titles.append(title)
                sptitle_collection.append(Set(title.split(".")))
                results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]['ignore'] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if (len(intrs) == len(strsearch1_collection)):
            findone = 1
        else:
            results[z]['ignore'] = 2
            # ~ relax the search ~ 0.45
            unmatched_terms_search = strsearch1_collection.difference(intrs)
            unmatched_count = 0
            for mst in unmatched_terms_search:
                my_list = [i for i in sptitle_collection[z] if i.find(mst) == 0]
                if (len(my_list)):
                    unmatched_count = unmatched_count + 1
                if (unmatched_count == len(unmatched_terms_search)):
                    findone = 1
                    results[z]['ignore'] = 0
                # ~ print unmatched_terms_search
                # ~ print unmatched_count
                # ~ print unmatched_terms_search


        # ~ print strsearch1_collection
        # ~ print intrs
        # ~ print findone
        # ~ print '------------------'

        if (findone and results[z]['ignore'] == 0):
            # ~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if (titles[z] == titles[v]):
                    sz1 = float(results[z]['size'])
                    sz2 = float(results[v]['size'])
                    if (abs(sz1 - sz2) < 5000000):
                        results[z]['ignore'] = 1
        # ~ stats
        rcount[results[z]['ignore']] += 1

    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    # ~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    # ~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if (logic_items[i][0] == '-'):
            exclude_coll.add(logic_items[i][1])
        if (logic_items[i][0] == '+'):
            include_coll.add(logic_items[i][1])
    if (len(include_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if (len(intrs_i) == 0):
                    results[z]['ignore'] = 2
    if (len(exclude_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if (len(intrs_e) > 0):
                    results[z]['ignore'] = 2
    # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(rcount[0]) + ' ' + str(
        rcount[1]) + ' ' + str(rcount[2])
    log.info(mssg)

    for z in xrange(len(results)):
        if (results[z]['ignore'] != 2):
            results_stats[str(results[z]['providertitle'])][1] = results_stats[str(results[z]['providertitle'])][1] + 1
    return results
コード例 #2
0
ファイル: megasearch.py プロジェクト: rocksie67/usntssearch
def summary_results(rawResults, strsearch, logic_items=[]):

    results = []
    titles = []
    sptitle_collection = []

    # ~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            rawResults[provid][z]["title"] = SearchModule.sanitize_html(rawResults[provid][z]["title"])
            title = SearchModule.sanitize_strings(rawResults[provid][z]["title"])
            titles.append(title)
            sptitle_collection.append(Set(title.split(".")))
            results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]["ignore"] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if len(intrs) == len(strsearch1_collection):
            findone = 1
        else:
            results[z]["ignore"] = 2

            # ~ print strsearch1_collection
            # ~ print intrs
            # ~ print findone
            # ~ print '------------------'

        if findone and results[z]["ignore"] == 0:
            # ~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if titles[z] == titles[v]:
                    sz1 = float(results[z]["size"])
                    sz2 = float(results[v]["size"])
                    if abs(sz1 - sz2) < 5000000:
                        results[z]["ignore"] = 1
                        # ~ stats
        rcount[results[z]["ignore"]] += 1

        # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
        # ~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    # ~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if logic_items[i][0] == "-":
            exclude_coll.add(logic_items[i][1])
        if logic_items[i][0] == "+":
            include_coll.add(logic_items[i][1])
    if len(include_coll):
        for z in xrange(len(results)):
            if results[z]["ignore"] < 2:
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if len(intrs_i) == 0:
                    results[z]["ignore"] = 2
    if len(exclude_coll):
        for z in xrange(len(results)):
            if results[z]["ignore"] < 2:
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if len(intrs_e) > 0:
                    results[z]["ignore"] = 2
                # ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = (
        "[" + strsearch1 + "]" + " [" + strsearch + "] " + str(rcount[0]) + " " + str(rcount[1]) + " " + str(rcount[2])
    )
    print mssg
    log.info(mssg)

    return results
コード例 #3
0
ファイル: megasearch.py プロジェクト: mwilborne/usntssearch
def summary_results(rawResults, strsearch, logic_items=[]):

    results = []
    titles = []
    sptitle_collection = []

    #~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            rawResults[provid][z]['title'] = SearchModule.sanitize_html(
                rawResults[provid][z]['title'])
            title = SearchModule.sanitize_strings(
                rawResults[provid][z]['title'])
            titles.append(title)
            sptitle_collection.append(Set(title.split(".")))
            results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]['ignore'] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if (len(intrs) == len(strsearch1_collection)):
            findone = 1
        else:
            results[z]['ignore'] = 2

        #~ print strsearch1_collection
        #~ print intrs
        #~ print findone
        #~ print '------------------'

        if (findone and results[z]['ignore'] == 0):
            #~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if (titles[z] == titles[v]):
                    sz1 = float(results[z]['size'])
                    sz2 = float(results[v]['size'])
                    if (abs(sz1 - sz2) < 5000000):
                        results[z]['ignore'] = 1
        #~ stats
        rcount[results[z]['ignore']] += 1

    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    #~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    #~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if (logic_items[i][0] == '-'):
            exclude_coll.add(logic_items[i][1])
        if (logic_items[i][0] == '+'):
            include_coll.add(logic_items[i][1])
    if (len(include_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if (len(intrs_i) == 0):
                    results[z]['ignore'] = 2
    if (len(exclude_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if (len(intrs_e) > 0):
                    results[z]['ignore'] = 2
    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = '[' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(
        rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2])
    print mssg
    log.info(mssg)

    return results
コード例 #4
0
ファイル: megasearch.py プロジェクト: JnrnZEDb/NZBMegaSearch
def summary_results(rawResults, strsearch, logic_items=[], results_stats={}):

    results = []
    titles = []
    sptitle_collection = []

    #~ stats for each provider
    for provid in xrange(len(rawResults)):
        if (len(rawResults[provid])):
            results_stats[str(rawResults[provid][0]['providertitle'])] = [
                len(rawResults[provid]), 0
            ]

    #~ all in one array
    for provid in xrange(len(rawResults)):
        for z in xrange(len(rawResults[provid])):
            if (rawResults[provid][z]['title'] != None):
                rawResults[provid][z]['title'] = SearchModule.sanitize_html(
                    rawResults[provid][z]['title'])
                rawResults[provid][z]['provid'] = provid
                title = SearchModule.sanitize_strings(
                    rawResults[provid][z]['title'])
                titles.append(title)
                sptitle_collection.append(Set(title.split(".")))
                results.append(rawResults[provid][z])

    strsearch1 = SearchModule.sanitize_strings(strsearch)
    strsearch1_collection = Set(strsearch1.split("."))

    rcount = [0] * 3
    for z in xrange(len(results)):
        findone = 0
        results[z]['ignore'] = 0
        intrs = strsearch1_collection.intersection(sptitle_collection[z])
        if (len(intrs) == len(strsearch1_collection)):
            findone = 1
        else:
            results[z]['ignore'] = 2
            #~ relax the search ~ 0.45
            unmatched_terms_search = strsearch1_collection.difference(intrs)
            unmatched_count = 0
            for mst in unmatched_terms_search:
                my_list = [
                    i for i in sptitle_collection[z] if i.find(mst) == 0
                ]
                if (len(my_list)):
                    unmatched_count = unmatched_count + 1
                if (unmatched_count == len(unmatched_terms_search)):
                    findone = 1
                    results[z]['ignore'] = 0
                #~ print unmatched_terms_search
                #~ print unmatched_count
                #~ print unmatched_terms_search

        #~ print strsearch1_collection
        #~ print intrs
        #~ print findone
        #~ print '------------------'

        if (findone and results[z]['ignore'] == 0):
            #~ print titles[z]
            for v in xrange(z + 1, len(results)):
                if (titles[z] == titles[v]):
                    sz1 = float(results[z]['size'])
                    sz2 = float(results[v]['size'])
                    if (abs(sz1 - sz2) < 5000000):
                        results[z]['ignore'] = 1
        #~ stats
        rcount[results[z]['ignore']] += 1

    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
    #~ logic params
    exclude_coll = Set([])
    include_coll = Set([])
    #~ print '*'+logic_items[0][1]+'*'
    for i in xrange(len(logic_items)):
        if (logic_items[i][0] == '-'):
            exclude_coll.add(logic_items[i][1])
        if (logic_items[i][0] == '+'):
            include_coll.add(logic_items[i][1])
    if (len(include_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_i = include_coll.intersection(sptitle_collection[z])
                if (len(intrs_i) == 0):
                    results[z]['ignore'] = 2
    if (len(exclude_coll)):
        for z in xrange(len(results)):
            if (results[z]['ignore'] < 2):
                intrs_e = exclude_coll.intersection(sptitle_collection[z])
                if (len(intrs_e) > 0):
                    results[z]['ignore'] = 2
    #~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

    mssg = 'Overall search stats: [' + strsearch1 + ']' + ' [' + strsearch + '] ' + str(
        rcount[0]) + ' ' + str(rcount[1]) + ' ' + str(rcount[2])
    log.info(mssg)

    for z in xrange(len(results)):
        if (results[z]['ignore'] != 2):
            results_stats[str(
                results[z]['providertitle'])][1] = results_stats[str(
                    results[z]['providertitle'])][1] + 1
    return results