def __init__(self, listing, coordMat, npriceList): """ Initialize the metric object Input: listing (pd dataframe or json list), coordMat and npriceList (arrays) """ if type(listing)!=pd.core.frame.DataFrame: self.listing = pd.DataFrame(listing) else: self.listing = listing self.listing = reviewTool.remove_noPosts(self.listing) self.coordMat = coordMat self.npriceList = npriceList
import metric #def main(): ### Data Prep ### list_files = os.listdir('./') list_files.sort() # This assumes that half of the files are legit and half # are the corresponding scams mid_idx = len(list_files)/2 legit = reviewTool.patch_listings(list_files[:mid_idx]) scams = reviewTool.patch_listings(list_files[mid_idx:]) # remove duplicates legit_clean = reviewTool.remove_duplicates([legit]) scams_clean = reviewTool.remove_duplicates([scams]) # remove noPosts legit_clean = reviewTool.remove_noPosts(legit_clean) scams_clean = reviewTool.remove_noPosts(scams_clean) # reprocess phone numbers (just in case) legit_clean = reviewTool.reprocess_phoneNumber_flag(legit_clean) scams_clean = reviewTool.reprocess_phoneNumber_flag(scams_clean) print "<> Clean-up process done!" # get the normalized price and coordinate matrix nprice, coordMat = reviewTool.get_nprice_and_coordMat(legit_clean) print "<> Got normalized prices and coordinates" # Get the training metrics legit_metric = metric.Metric(legit_clean, coordMat, nprice) legit_farr = legit_metric.format_metrics() scams_metric = metric.Metric(scams_clean, coordMat, nprice) scams_farr = scams_metric.format_metrics() print "<> Got the metrics" ### Data Training ###