from Other import Analysis import pickle loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/" "loans_assigned_for_tagging_with_descriptions.csv")) labels = [] toremove = [] for i, loan in enumerate(loans): if loan["RB"] == "1": toremove.append(i) continue if "#RepeatBorrower" in loan["Tags"]: labels.append(1) else: labels.append(0) forest, vectorizer, selector = Analysis.initialize( "loans_assigned_for_tagging_with_descriptions", labels, "Description", toremove) pickle.dump(forest, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/RBForest", "wb+")) pickle.dump(vectorizer, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/" "RBVectorizer", "wb+")) pickle.dump(selector, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/" "RBSelector", "wb+"))
(loan["Partner Name"] == "iDE Cambodia" or loan["Partner Name"] == "TerraClear Development") and "water filter" in loan["Use"]): toremove.append(i) continue if activity == "Used Clothing" or activity == "Used Shoes" \ or activity == "Bicycle Sales" \ or activity == "Renewable Energy Products" \ or activity == "Recycled Materials" \ or activity == "Recycling": toremove.append(i) continue if "#Eco-friendly" in loan["Tags"]: labels.append(1) else: labels.append(0) forest, vectorizer, selector = Analysis.initialize( "loans_assigned_for_tagging_with_descriptions_combined2", labels, "Use", toremove, 250, class_weight="balanced") pickle.dump(forest, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/EFForest", "wb+")) pickle.dump(vectorizer, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/" "EFVectorizer", "wb+")) pickle.dump(selector, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/" "EFSelector", "wb+"))
loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/loans_assigned_for_tagging_with_descriptions_new.csv")) forest = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/HASForest", "rb")) print(forest.best_params_) vectorizer = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/HASVectorizer", "rb")) selector = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/HASSelector", "rb")) for loan in tqdm(loans): if loan["Sector"] == "Health": continue modified = [Analysis.modify(loan["Use"])] if modified != [None]: modified = vectorizer.transform(modified) modified_and_selected = selector.transform(modified).toarray() prediction = forest.predict_proba(modified_and_selected) if prediction[0][1] < .6: # 0.6 continue else: continue if "#HealthAndSanitation" in loan["Tags"]: correct += 1 else: print(total, loan["Raw Link"]) total += 1 print(correct, total) print(correct / total)
from Other import Analysis import pickle loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/" "loans_assigned_for_tagging_with_descriptions_combined3.csv")) labels = [] toremove = [] for i, loan in enumerate(loans): if loan["Sector"] == "Education": toremove.append(i) continue if "#Schooling" in loan["Tags"]: labels.append(1) else: labels.append(0) forest, vectorizer, selector = Analysis.initialize( "loans_assigned_for_tagging_with_descriptions_combined3", labels, "Description", toremove, n_estimators=25) pickle.dump(forest, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/ScForest", "wb+")) pickle.dump(vectorizer, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/" "ScVectorizer", "wb+")) pickle.dump(selector, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/" "ScSelector", "wb+"))
from Other import Analysis import pickle writer = csv.writer(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/BagOfWords/" "SBagOfWords.csv", "w+")) writer.writerow(["id", "description", "value"]) ids = [] loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/" "loans_assigned_for_tagging_with_descriptions.csv")) for loan in loans: if "#Single" in loan["Tags"] or "#SingleParent" in loan["Tags"]: writer.writerow([loan["Loan ID"], loan["Description"], 1]) else: writer.writerow([loan["Loan ID"], loan["Description"], 0]) forest, vectorizer, selector = Analysis.initialize("S") pickle.dump(forest, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/SForest", "wb+")) pickle.dump(vectorizer, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/" "SVectorizer", "wb+")) pickle.dump(selector, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/" "SSelector", "wb+"))
writer = csv.writer(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/BagOfWords/" "PBagOfWords.csv", "w+")) writer.writerow(["id", "description", "value"]) ids = [] loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/" "loans_assigned_for_tagging_with_descriptions.csv")) for loan in tqdm(loans): age = GetAge.GetAge(loan["Description"]) if age and age >= 50: continue if "#Parent" in loan["Tags"] or "#SingleParent" in loan["Tags"]: writer.writerow([loan["Loan ID"], loan["Description"], 1]) else: writer.writerow([loan["Loan ID"], loan["Description"], 0]) forest, vectorizer, selector = Analysis.initialize("P", [250, 2]) pickle.dump(forest, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/PForest", "wb+")) pickle.dump(vectorizer, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/" "PVectorizer", "wb+")) pickle.dump(selector, open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/" "PSelector", "wb+"))
loans = csv.DictReader(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/loans_assigned_for_tagging_with_descriptions.csv")) pforest = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Forests/SForest", "rb")) pvectorizer = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Vectorizers/SVectorizer", "rb")) pselector = pickle.load(open( "/Users/thomaswoodside/PycharmProjects/AutoTag/DataFiles/Selectors/SSelector", "rb")) for loan in tqdm(loans): modified = [Analysis.modify(loan["Description"])] if modified != [None]: pmodified = pvectorizer.transform(modified) pmodified_and_selected = pselector.transform(pmodified).toarray() pprediction = pforest.predict_proba(pmodified_and_selected) else: continue if pprediction[0][1] < 0.5: continue if "#Single" in loan["Tags"] or "#SingleParent" in loan["Tags"]: correct += 1 else: print(loan["Raw Link"]) total += 1 print(correct, total) try: