Python cleanStringAndLemmatize Examples

Programming Language: Python

Namespace/Package Name: BM25.TextCleaning

Method/Function: cleanStringAndLemmatize

Examples at hotexamples.com: 8

Python cleanStringAndLemmatize - 8 examples found. These are the top rated real world Python examples of BM25.TextCleaning.cleanStringAndLemmatize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: DocIteration.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def pre_post_cleansing(rownumber, csvfilename):
    reader = csv.DictReader(open(csvfilename, encoding = "utf-8"))
    rows = [row for row in reader]
    before = rows[rownumber]["srvc_req_prob_text"]
    after = wordslist2string(cleanStringAndLemmatize(before))
    print(before, " becomes ", after)
    return before, after

Example #2

Show file

File: DocIteration.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def onecorpus(csvfilename, num):
    preparecwd(num)
    if not os.path.exists(str(num)):
        os.makedirs(str(num))
    reader = csv.DictReader(open(csvfilename, encoding = "utf-8"))
    for row in reader:
        name = row["sr_closer_name"]
        docname = name.replace("," , "_").replace(" " , "")
        filepath = str(num) + docname + ".txt"
        with open(filepath, "a", encoding = "utf-8") as file:
            #write processed string, not raw string from csv
            newline = wordslist2string(cleanStringAndLemmatize(str(row["srvc_req_prob_text"])))
            if len(newline) > 2:
                newline = newline + " \n "
                file.write(newline)

Example #3

Show file

File: DocIteration.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def twocorpora(csvfilename, num, percentintraining):
    preparecwd(num)
    if not os.path.exists(str(num) + "/TrainCorpus"):
        os.makedirs(str(num) + "/TrainCorpus")
    if not os.path.exists(str(num) + "/TestCorpus"):
        os.makedirs(str(num) + "/TestCorpus")
    reader = csv.DictReader(open(csvfilename, encoding = "utf-8"))
    for row in reader:
        name = row["sr_closer_name"]
        docname = name.replace("," , "_").replace(" " , "")
        if random.randrange(0,100)/100 < percentintraining:
            filepath = str(num) + "/TrainCorpus"+ "/" + docname + ".txt"
        else:
            filepath = str(num) + "/TestCorpus"+ "/" + docname + ".txt"
        with open(filepath, "a", encoding = "utf-8") as file:
            #write processed string, not raw string from csv
            newline = wordslist2string(cleanStringAndLemmatize(str(row["srvc_req_prob_text"])))
            if len(newline) > 2:
                newline = newline + " \n "
                file.write(newline)

Example #4

Show file

File: untitled.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def match():
    try:
        print(request)
        myjson = request.get_json(force=True)
        print("this is ",myjson)
    except Exception as inst:
        print(type(inst),inst.args,inst,"get getTSEs get json Fail!")
        return errorResponseVanilla("getTSEs "+str(type(inst)), status=500)
    try:
        print(myjson["problem_description"])
        probdesc = wordslist2string(cleanStringAndLemmatize(myjson["problem_description"]))
        predictions = temp.queryalgorithm(probdesc)
        predictions = [changeback(prediction) for prediction in predictions]

    except Exception as inst:
        return errorResponseVanilla("getTSEs "+str(type(inst)), status=500)
    data = {
        "error":"",
        "TSEs": predictions
    }
    print(data)
    js = json.dumps(data)
    return jsonVanilla(js,status=200)

Example #5

Show file

File: DocIteration.py Project: BasilBeirouti/Flask_Eval_Framework_Server

 def __iter__(self):
     for row in self.reader:
         name = row["sr_closer_name"]
         docname = name.replace("," , "_").replace(" " , "")
         newline = wordslist2string(cleanStringAndLemmatize(str(row["srvc_req_prob_text"])))
         yield docname, newline

Example #6

Show file

File: bm25test2.py Project: BasilBeirouti/Flask_Eval_Framework_Server

# __author__ = 'basil.beirouti'

import csv, datetime, random
from BM25 import last_thousand
from BM25.Scheduling import whos_on, read_filtered_csv_file, read_raw_schedule_csv, write_filtered_csv_file, this_year, docmatrix_data
from BM25.Plugins import tuples_tse_psums_concat
from BM25.TextCleaning import wordslist2string, cleanStringAndLemmatize
from BM25.BM25Okapi import QueryMaster, DocMatrix

def rand_divide(data, proportion):
    lendata = len(data)
    numgroup1 = round(proportion*lendata)
    numgroup2 = lendata-numgroup1
    random.shuffle(data)
    random.shuffle(data)
    group1 = data[0:numgroup1]
    group2 = data[numgroup1:]
    assert(numgroup1 == len(group1))
    assert(numgroup2 == len(group2))
    return group1, group2

rows = read_filtered_csv_file()
on_now = whos_on(rows)
personids = [el[1] for el in on_now]
out = docmatrix_data(personids, 500)
srnums, badgenums, personids, fns, lns, psums, dates = zip(*out)
tupsdata = [(el[3] + el[4], el[5]) for el in out]
cleaned_data= [(el[0], wordslist2string(cleanStringAndLemmatize(el[1]))) for el in tupsdata]
train, test = rand_divide(cleaned_data, 0.75)
processed_data = tuples_tse_psums_concat(train)
DocMatrix(processed_data, )

Example #7

Show file

File: Plugins.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def clean_docmatrix_data(raw_data):
    #lastname_firstname, cleanedproblemsummary
    temp = [(el[3].replace(" ", "") + "_" + el[2].replace(" ", ""), wordslist2string(cleanStringAndLemmatize(el[4]))) for el in raw_data]
    temp.sort(key = operator.itemgetter(0))
    return temp

Example #8

Show file

File: Plugins.py Project: BasilBeirouti/Flask_Eval_Framework_Server

def csv_to_tups(csvfilename):
    reader = csv.DictReader(open(csvfilename, encoding = "utf-8"))
    alldata = list((changename(row["sr_closer_name"]), wordslist2string(cleanStringAndLemmatize(row["srvc_req_prob_text"])))for row in reader)
    alldata = sorted(alldata, key = operator.itemgetter(0))
    return alldata