Exemple #1
0
def main():
    """Requests the PhraseFinder web service and prints out the result."""

    # Set up your query.
    query = 'I like'

    # Optional: set the maximum number of phrases to return.
    options = pf.SearchOptions()
    options.topk = 10

    # Send the request.
    try:
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        if result.status != pf.Status.OK:
            print('Request was not successful: {}'.format(result.status))
            return

        # Print phrases line by line.
        for phrase in result.phrases:
            print("{0:6f}".format(phrase.score), end="")
            for token in phrase.tokens:
                print(" {}".format(token.text), end="")
            print()

    except Exception as error:
        print('Some error occurred: {}'.format(error))
def main(query, resultdict):
    # Set up your query.

    #set the maximum number of phrases to return.
    options = pf.SearchOptions()
    options.topk = 1

    # Send the request.
    try:
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        if result.status != pf.Status.OK:
            resultdict[query] = 0
            return

        for phrase in result.phrases:
            if query in resultdict.keys():
                resultdict[query] = resultdict[query] + phrase.match_count
            else:
                resultdict[query] = phrase.match_count

        if query not in resultdict.keys():
            resultdict[query] = 0

    except Exception as error:
        resultdict[query] = 0
        return
Exemple #3
0
def pf_query(query='I like ?'):
    """Queries the PhraseFinder web service and returns the result."""
    q = query
    result = []
    # Perform a request.
    try:
        response = phrasefinder.search(q, params)
        if response.status != phrasefinder.Status.Ok:
            print('Request was not successful: {}'.format(response.status))
            return result
        # Print phrases line by line.
        # TODO light pre-processing (relFreq, organizing, etc)
        for phrase in response.phrases:
            phrase_relFreq = phrase.match_count / ngram_totals[len(phrase.tokens) - 1]
            print("{0} {1:6f} {2:6f}".format(phrase.match_count, phrase.score, phrase_relFreq), end="")
            for token in phrase.tokens:
                print(' {}_{}'.format(token.text, token.tag), end="")
            print()
        # Example output:
        #   1065105 0.268530 0.002543 I_0 like_0 to_1
        #   484768 0.122218 0.001157 I_0 like_0 the_1
        #   ...
        # Token tag meaning:
        # 0 => Given
        # 1 => Inserted
        # 2 => Alternative
        # 3 => Completed
    except Exception as error:
        # Catch-all for connection issues, malformed query, something else unforseen
        print('Some error occurred: {}'.format(error))
    return result
def ngrams(words, quiet=True):
    match = []
    volume = []

    word_count = len(words)
    counter = 1
    for x in words:

        if not quiet:
            sys.stdout.write("\r%d/%d" % (counter, word_count))
            sys.stdout.flush()

        match_str = '1'
        vol_str = '1'

        try:
            # search for term x through Google Ngrams using phrasefinder
            result = phrasefinder.search(x)

            if result.status == phrasefinder.Status.Ok:
                if len(result.phrases) > 0:
                    match_str = (result.phrases[0].match_count)
                    vol_str = (result.phrases[0].volume_count)
        except:
            match_str = '-1'
            vol_str = '-1'

        match.append(match_str)
        volume.append(vol_str)

        counter += 1

    return match, volume
Exemple #5
0
def qryGoogle(qryStr):
    """Requests the PhraseFinder web service and prints out the result."""
    # Set up your query.
    query = qryStr

    # Optional: set the maximum number of phrases to return.
    options = pf.SearchOptions()
	#options.format='tsv'
    options.topk = 1
    retval =0

    # Send the request.
    try:
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        if result.status != pf.Status.OK:
            print('Request was not successful: {}'.format(result.status))
            return
        if len(result.phrases)==0:
            retval = 0
        else:
            retval = result.phrases[0].match_count


    except Exception as error:
        print('Some error occurred: {}'.format(error))
        raise
    
    return retval
def frequency_score(temp):
    repo = [temp]
    sum = 0
    '''for l in range(4, len(temp)):
        for i in range(0, len(temp)-l+1):
            substr = temp[i:i + l]
            # print(substr)
            repo.append(substr)
    '''
    for word in repo:
        query = word
        options = pf.SearchOptions()
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        max_no = -1
        for phrase in result.phrases:
            max_no = max(phrase.score, max_no)
        if max_no < 0:
            continue
        sum += pow(2, len(word)) * max_no

    return sum
Exemple #7
0
def google_corpus_freq(temp):
    repo = []
    sum = 0

    for l in range(3, min(len(temp), 5)):
        for i in range(0, len(temp) - l):
            substr = temp[i:i + l]
            repo.append(substr)

    for word in repo:
        query = word
        options = pf.SearchOptions()
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        max_no = -1
        for phrase in result.phrases:
            max_no = max(phrase.score, max_no)
        if max_no < 0:
            continue
        sum += pow(2, len(word)) * max_no

    return sum
def new_main(query, resultdict):
    # Set up your query.

    #set the maximum number of phrases to return.
    options = pf.SearchOptions()
    options.topk = 30

    # Send the request.
    try:
        result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
        if result.status != pf.Status.OK:
            return

        for phrase in result.phrases:
            skey = ""
            for token in phrase.tokens:
                skey = skey + token.text + " "
            resultdict[skey] = phrase.match_count

    except Exception as error:
        return
Exemple #9
0
def ngram_search(word1, word2):
    options = pf.SearchOptions()
    options.topk = 100  # the maximum number of phrases to return.
    query = "*" + word1 + "*" + word2 + "*"
    query_rev = "*" + word2 + "*" + word1 + "*"

    langs = [pf.Corpus.AMERICAN_ENGLISH, pf.Corpus.BRITISH_ENGLISH]
    queries = [query, query_rev]

    counts = []
    try:
        for query in queries:
            for lang in langs:
                result = pf.search(lang, query, options)
                if result.status != pf.Status.OK:
                    print('Request was not successful: {}'.format(
                        result.status))
                    return
                for phrase in result.phrases:
                    counts.append(phrase.match_count)
    except Exception as error:
        print('Some error in querrying occurred: {}'.format(error))
    return np.sum(np.array(counts))
Exemple #10
0
    for s in lines:
        j = 0
        for j in range(0, len(s)):
            if s[j] == '.':
                break
        temp = s[0:j]
        repo = []
        sum = 0
        for l in range(3, min(len(temp), 5)):
            for i in range(0, len(temp) - l):
                substr = temp[i:i + l]
                repo.append(substr)
        for word in repo:
            query = word
            options = pf.SearchOptions()
            result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options)
            maxno = -1
            for phrase in result.phrases:
                maxno = max(phrase.score, maxno)
            if (maxno < 0):
                continue
            sum += pow(10, len(word)) * maxno

        outF.write(str(sum))

        outF.write("\n")
        count = count + 1
        print(count)

        if count == 1000:
            break