from Other import Analysis
import pickle

loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/"
    "loans_assigned_for_tagging_with_descriptions.csv"))
labels = []
toremove = []
for i, loan in enumerate(loans):
    if loan["RB"] == "1":
        toremove.append(i)
        continue
    if "#RepeatBorrower" in loan["Tags"]:
        labels.append(1)
    else:
        labels.append(0)
forest, vectorizer, selector = Analysis.initialize(
    "loans_assigned_for_tagging_with_descriptions", labels,
    "Description", toremove)
pickle.dump(forest, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/RBForest",
    "wb+"))
pickle.dump(vectorizer, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/"
    "RBVectorizer",
    "wb+"))
pickle.dump(selector, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/"
    "RBSelector",
    "wb+"))
                (loan["Partner Name"] == "iDE Cambodia"
                 or loan["Partner Name"] == "TerraClear Development")
            and "water filter" in loan["Use"]):
        toremove.append(i)
        continue
    if activity == "Used Clothing" or activity == "Used Shoes" \
            or activity == "Bicycle Sales" \
            or activity == "Renewable Energy Products" \
            or activity == "Recycled Materials" \
            or activity == "Recycling":
        toremove.append(i)
        continue
    if "#Eco-friendly" in loan["Tags"]:
        labels.append(1)
    else:
        labels.append(0)
forest, vectorizer, selector = Analysis.initialize(
    "loans_assigned_for_tagging_with_descriptions_combined2", labels,
    "Use", toremove, 250, class_weight="balanced")
pickle.dump(forest, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/EFForest",
    "wb+"))
pickle.dump(vectorizer, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/"
    "EFVectorizer",
    "wb+"))
pickle.dump(selector, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/"
    "EFSelector",
    "wb+"))
예제 #3
0
loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/loans_assigned_for_tagging_with_descriptions_new.csv"))
forest = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/HASForest",
    "rb"))
print(forest.best_params_)
vectorizer = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/HASVectorizer",
    "rb"))
selector = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/HASSelector",
    "rb"))
for loan in tqdm(loans):
    if loan["Sector"] == "Health":
        continue
    modified = [Analysis.modify(loan["Use"])]
    if modified != [None]:
        modified = vectorizer.transform(modified)
        modified_and_selected = selector.transform(modified).toarray()
        prediction = forest.predict_proba(modified_and_selected)
        if prediction[0][1] < .6:  # 0.6
            continue
    else:
        continue
    if "#HealthAndSanitation" in loan["Tags"]:
        correct += 1
    else:
        print(total, loan["Raw Link"])
    total += 1
    print(correct, total)
    print(correct / total)
from Other import Analysis
import pickle

loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/"
    "loans_assigned_for_tagging_with_descriptions_combined3.csv"))
labels = []
toremove = []
for i, loan in enumerate(loans):
    if loan["Sector"] == "Education":
        toremove.append(i)
        continue
    if "#Schooling" in loan["Tags"]:
        labels.append(1)
    else:
        labels.append(0)
forest, vectorizer, selector = Analysis.initialize(
    "loans_assigned_for_tagging_with_descriptions_combined3", labels,
    "Description", toremove, n_estimators=25)
pickle.dump(forest, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/ScForest",
    "wb+"))
pickle.dump(vectorizer, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/"
    "ScVectorizer",
    "wb+"))
pickle.dump(selector, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/"
    "ScSelector",
    "wb+"))
예제 #5
0
from Other import Analysis
import pickle

writer = csv.writer(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/BagOfWords/"
    "SBagOfWords.csv",
    "w+"))
writer.writerow(["id", "description", "value"])
ids = []
loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/"
    "loans_assigned_for_tagging_with_descriptions.csv"))
for loan in loans:
    if "#Single" in loan["Tags"] or "#SingleParent" in loan["Tags"]:
        writer.writerow([loan["Loan ID"], loan["Description"], 1])
    else:
        writer.writerow([loan["Loan ID"], loan["Description"], 0])

forest, vectorizer, selector = Analysis.initialize("S")
pickle.dump(forest, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/SForest",
    "wb+"))
pickle.dump(vectorizer, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/"
    "SVectorizer",
    "wb+"))
pickle.dump(selector, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/"
    "SSelector",
    "wb+"))
예제 #6
0
writer = csv.writer(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/BagOfWords/"
    "PBagOfWords.csv",
    "w+"))
writer.writerow(["id", "description", "value"])
ids = []
loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/"
    "loans_assigned_for_tagging_with_descriptions.csv"))
for loan in tqdm(loans):
    age = GetAge.GetAge(loan["Description"])
    if age and age >= 50:
        continue
    if "#Parent" in loan["Tags"] or "#SingleParent" in loan["Tags"]:
        writer.writerow([loan["Loan ID"], loan["Description"], 1])
    else:
        writer.writerow([loan["Loan ID"], loan["Description"], 0])

forest, vectorizer, selector = Analysis.initialize("P", [250, 2])
pickle.dump(forest, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/PForest",
    "wb+"))
pickle.dump(vectorizer, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/"
    "PVectorizer",
    "wb+"))
pickle.dump(selector, open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/"
    "PSelector",
    "wb+"))
예제 #7
0
loans = csv.DictReader(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/loans_assigned_for_tagging_with_descriptions.csv"))

pforest = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/SForest",
    "rb"))
pvectorizer = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/SVectorizer",
    "rb"))
pselector = pickle.load(open(
    "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/SSelector",
    "rb"))

for loan in tqdm(loans):
    modified = [Analysis.modify(loan["Description"])]
    if modified != [None]:
        pmodified = pvectorizer.transform(modified)
        pmodified_and_selected = pselector.transform(pmodified).toarray()
        pprediction = pforest.predict_proba(pmodified_and_selected)
    else:
        continue
    if pprediction[0][1] < 0.5:
        continue
    if "#Single" in loan["Tags"] or "#SingleParent" in loan["Tags"]:
        correct += 1
    else:
        print(loan["Raw Link"])
    total += 1
    print(correct, total)
    try: