Example #1
0
    def __init__(self, listing, coordMat, npriceList):
        """
        Initialize the metric object

        Input:  listing (pd dataframe or json list), coordMat and npriceList (arrays)
        """
        if type(listing)!=pd.core.frame.DataFrame:
            self.listing = pd.DataFrame(listing)
        else:
            self.listing = listing
        self.listing    = reviewTool.remove_noPosts(self.listing)
        self.coordMat   = coordMat
        self.npriceList = npriceList
Example #2
0
import metric

#def main():
### Data Prep ###
list_files = os.listdir('./')
list_files.sort()
# This assumes that half of the files are legit and half 
# are the corresponding scams
mid_idx = len(list_files)/2
legit = reviewTool.patch_listings(list_files[:mid_idx])
scams = reviewTool.patch_listings(list_files[mid_idx:])
# remove duplicates
legit_clean = reviewTool.remove_duplicates([legit])
scams_clean = reviewTool.remove_duplicates([scams])
# remove noPosts
legit_clean = reviewTool.remove_noPosts(legit_clean)
scams_clean = reviewTool.remove_noPosts(scams_clean)
# reprocess phone numbers (just in case)
legit_clean = reviewTool.reprocess_phoneNumber_flag(legit_clean)
scams_clean = reviewTool.reprocess_phoneNumber_flag(scams_clean)
print "<> Clean-up process done!"
# get the normalized price and coordinate matrix
nprice, coordMat = reviewTool.get_nprice_and_coordMat(legit_clean)
print "<> Got normalized prices and coordinates"
# Get the training metrics
legit_metric = metric.Metric(legit_clean, coordMat, nprice)
legit_farr   = legit_metric.format_metrics()
scams_metric = metric.Metric(scams_clean, coordMat, nprice)
scams_farr   = scams_metric.format_metrics()
print "<> Got the metrics"
### Data Training ###