def fix_olx_model(): db = dao.get_db() products = db.products.find({'source': 'olx.sa.com'}) # npproducts = np.array([product for product in products]) #np.array(products) # npproducts = [] models = [] nonrecognized = [] NoneCounter = 0 for product in products: # if len(product['model']['name']) >0 and models.find(product['model']['name']) == 0: # db.models.insert({'name':'', 'name_ar':product['model'], 'active':bool('true')}) # npproducts.append(product) if product['model'] == None: NoneCounter += 1 # # unique, counts = np.unique(models, return_counts=True) # hist = np.array((unique, counts)).T # print hist # print nonrecognized # print str(NoneCounter) # util.save_list_as_csv(hist,'coutns', '/home/bebe/0yasser/shofle/source/shofle_reader/log/' ) # plt.plot(unique, counts) # plt.xticks(unique, counts) # plt.show() print(str(NoneCounter)) print("Done")
def dump_models(): db = dao.get_db() models = db.models.find({}) models_list = [] for product in models: models_list.append(product) util.save_list_as_csv( models_list, 'models.csv', '/home/bebe/0yasser/shofle/source/shofle_reader/log/') print("Done")
def update_models(): db = dao.get_db() models = util.read_csv_as_list_( 'models.csv', '/home/bebe/0yasser/shofle/source/shofle_reader/log/') for model in models: model['active'] = bool('true') model['name'] = model['name'].strip() model['name_ar'] = model['name_ar'].strip() db.models.insert(model) print("Done")
def get_ads_dataset(filters, fields_to_project, target_field): db = dao.get_db() try: # { # "brand": ObjectId("5916d82fff4a4215c6cb2ac7"), # "model": "" # "year": "٢٠١٥", # "body_type": "", # "engine": 0, # "power": 0, # "fuel": "", # "transmission": "اوتوماتيك", # "location": "جدة", # "specs": "" # } # {"body_type": {"$ne": ""}, "$and": [{"engine": {"$ne": "0"}}, {"engine": {"$ne": 0}}], # "power": {"$ne": 0}, "fuel": {"$ne": ""}, "specs": {"$ne": ""}}, # {"brand": 1, "model": 1, "year": 1, "body_type": 1, "engine": 1, "power": 1, "fuel": 1, # "transmission": 1, "location": 1, "specs": 1, "_id": 0}): ads = [] for ad in db.products.find( filters, { "brand": 1, "model": 1, "year": 1, "body_type": 1, "engine": 1, "power": 1, "fuel": 1, "transmission": 1, "location": 1, "specs": 1, "_id": 0 }): temp = [] target_field_index = 0 counter = -1 for key in ad: if key in fields_to_project: if key != target_field: temp.append(hash(ad[key])) else: temp.append(ad[key]) counter += 1 if key == target_field: target_field_index = counter ads.append(temp) ads = np.array(ads) # swapping columns so that y i.e target is col 0 ads[:, 0], ads[:, target_field_index] = ads[:, target_field_index], ads[:, 0].copy( ) # # cleaning engine capcity less than 1000 cc # ads[ads[:, 0] > 900, :] # # lebeling rest of columns # le = preprocessing.LabelEncoder() # for i in range (1,ads.shape[1]): # le.fit(ads[:,i]) # ads[:, i] = le.fit_transform(ads[:,i]) # Calculate mode of each column except target key # max(set(list), key=list.count) return ads except (Exception, e): util.logerr(str(e)) return