def start_requests(self): AppProducts = self.getURLS() for items in AppProducts: allProductPriceDict = {} product_id = items[0] brand = items[1] productName = items[2] urlList = items[3] priceList = [] update_time = datetime.datetime.now().strftime("%y-%m-%d-%H-%M") current_dict = DBOperations.getCollectionProduct( self.priceClient, self.PriceCollection, product_id) for url in urlList: # url = "http://www.amazon.in/dp/B00MPDR6PW" print url try: response = requests.get(url) priceDict = self.parse(response, current_dict) priceDict = self.checkForNonZeroPrice(priceDict, current_dict) priceList.append(priceDict) except Exception, err: print(traceback.format_exc()), "Error", url print "Extracting Price for Produt: " , product_id # self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch}) allProductPriceDict['product_id'] = product_id allProductPriceDict['brand'] = brand allProductPriceDict['productName'] = productName allProductPriceDict['priceList'] = priceList allProductPriceDict["update_time"] = update_time DBOperations.mongoSaveDocument(allProductPriceDict, self.PriceCollection, self.priceClient, 'product_id', False)
def upDateProductRecommendation(): ProductRecommendationClient = DBOperations.getMongoDBClient(ProductRecommendationDBName) fileList = getProductRecommendationFile(ProductRecommendationPath) for file in fileList: for row in __getListFromCSV(file): itemDict = {} product_id, category, recommendedProducts = getProductElementsFromMasterFileRow(row) itemDict['product_id'] = product_id itemDict['category'] = category itemDict['recommendedProducts'] = recommendedProducts if recommendedProducts != []: DBOperations.mongoSaveDocument(itemDict,"allComp", ProductRecommendationClient, 'product_id', False)
def getURLS(self): inputFile = self.ProductMasterFilePath fileObj = open(inputFile) ProductList = [] reader = csv.reader(fileObj) DBMasterClient = DBOperations.getMongoDBClient(self.ProductMasterDBName) masterCursor = DBOperations.getCollectionCursorObject(DBMasterClient, "allproducts") for items in masterCursor: product_id = items['product_id'] brand = items['brand'] category = items['category'] product_name = items['product_name'] product_urlList = items['product_urlList'] url = self.__getOrderedURL(product_urlList) ProductList.append([product_id,brand,product_name, url, category]) return ProductList
def upDateProductRecommendation(): ProductRecommendationClient = DBOperations.getMongoDBClient( ProductRecommendationDBName) fileList = getProductRecommendationFile(ProductRecommendationPath) for file in fileList: for row in __getListFromCSV(file): itemDict = {} product_id, category, recommendedProducts = getProductElementsFromMasterFileRow( row) itemDict['product_id'] = product_id itemDict['category'] = category itemDict['recommendedProducts'] = recommendedProducts if recommendedProducts != []: DBOperations.mongoSaveDocument(itemDict, "allReco", ProductRecommendationClient, 'product_id', False)
def parse(self,response,url, brand, productName, product_id, snapDealMatch, amazonMatch): productJSON = {} if ("flipkart" in url): flipKartScrapper = FlipKartScrapper() productJSON = flipKartScrapper.downloadProductDetails(response.content, productName, brand) if ("snapdeal" in url): snapdealScrapper = SnapDealScrapper() productJSON = snapdealScrapper.downloadProductDetails(response.content, productName, brand, snapDealMatch) if ("amazon" in url): amazonScrapper = AmazonScrapper() productJSON = amazonScrapper.downloadProductDetails(response.content, productName, brand, amazonMatch) # self.saveOutPut(productJSON, outputFilePath) productJSON['product_id'] = product_id productJSON['spec_url'] = response.url # print productJSON DBOperations.mongoSaveDocument(productJSON,self.SpecificationCollection, self.specificationClient, "product_id", False)
def getURLS(self): inputFile = self.ProductMasterFilePath fileObj = open(inputFile) ProductList = [] reader = csv.reader(fileObj) DBMasterClient = DBOperations.getMongoDBClient( self.ProductMasterDBName) masterCursor = DBOperations.getCollectionCursorObject( DBMasterClient, "allproducts") for items in masterCursor: product_id = items['product_id'] brand = items['brand'] category = items['category'] product_name = items['product_name'] product_urlList = items['product_urlList'] url = self.__getOrderedURL(product_urlList) ProductList.append( [product_id, brand, product_name, url, category]) return ProductList
def getURLS(self): productMasterClient = DBOperations.getMongoDBClient(self.ProductMasterDBName) cursor = productMasterClient.allproducts.find() ProductList = [] for row in cursor: product_id = row['product_id'] brand = row['brand'] product_name = row['product_name'] product_urlList = row['product_urlList'] ProductList.append([product_id,brand,product_name,product_urlList]) return ProductList
def getURLS(): productMasterClient = DBOperations.getMongoDBClient(ProductMasterDBName) cursor = productMasterClient.allproducts.find() ProductList = [] for row in cursor: product_id = row["product_id"] brand = row["brand"] product_name = row["product_name"] product_urlList = row["product_urlList"] url = __getOrderedURL(product_urlList) ProductList.append([product_id, url]) return ProductList
def getURLS(): productMasterClient = DBOperations.getMongoDBClient(ProductMasterDBName) cursor = productMasterClient.allproducts.find() ProductList = [] for row in cursor: product_id = row['product_id'] brand = row['brand'] product_name = row['product_name'] product_urlList = row['product_urlList'] url = __getOrderedURL(product_urlList) ProductList.append([product_id, url]) return ProductList
class SpecificationScrapper(): specificationClient = DBOperations.getMongoDBClient("ProductSpecification") ProductMasterDBName = "Productmaster" SpecificationCollection = "allproducts" ProductMasterFilePath = "/home/" + getpass.getuser( ) + "/BazaarfundaSrapperFiles/ProductMaster/MasterFile_Overall.csv" def start_requests(self): AppProducts = self.getURLS() # AppProducts = [["D11111","HP","HP-1", "http://www.snapdeal.com/product/apple-imac-mf886hna-desktop-4th/633082501991", "desktop"]] allCategory = [category[4] for category in AppProducts] allCategory = list(set(allCategory)) snapDealSpecificationMatchDict = {} amazonSpecificationMatchDict = {} for cat in allCategory: if cat != "Category" and cat != '': snapDealSpecificationMatchDict[cat] = self.createMatchDict( './SpecificationMatch/' + cat + '/SpecificationMatchSnapDeal.csv') amazonSpecificationMatchDict[cat] = self.createMatchDict( './SpecificationMatch/' + cat + '/SpecificationMatchAmazon.csv') for items in AppProducts: product_id = items[0] brand = items[1] productName = items[2] url = items[3] category = items[4] try: snapDealMatch = snapDealSpecificationMatchDict[category] amazonMatch = amazonSpecificationMatchDict[category] # outputFilePath = self.outputFilePath + productName.replace("/","") + ".json" outputFilePath = "" print product_id if not DBOperations.isIdPresent(self.specificationClient, self.SpecificationCollection, "product_id", product_id): try: response = requests.get(url, timeout=5) self.parse(response, url, brand, productName, product_id, snapDealMatch, amazonMatch) except Exception, err: print(traceback.format_exc()), "Error", url, items # self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch}) except Exception, err: print(traceback.format_exc())
def execute(trial=False): startTime = datetime.datetime.now() # Set up the database connection. client = dml.pymongo.MongoClient() repo = client.repo repo.authenticate('ojhamb_runtongy_sgullett_zybu', 'ojhamb_runtongy_sgullett_zybu') repo.dropCollection("Tweets") repo.createCollection("Tweets") #Setting Up the criteria for selecting 10000 tweets tweetCriteria = got.manager.TweetCriteria().setNear('Amman').setWithin( '150mi').setMaxTweets(10000) tweets = got.manager.TweetManager.getTweets(tweetCriteria) #Projecting a list of tweets listTweets = dbo.project( tweets, lambda t: (t.id, t.date.strftime("%Y-%m-%d %H:%M"), t.retweets, t.favorites, t.text, t.geo, t.hashtags)) #Selecting the list of tweets which have a Geo Location tweetsWithIDs = dbo.select(listTweets, lambda x: x[5] != '') print(tweetsWithIDs) repo.dropCollection("Tweets") repo.createCollection("Tweets") repo['ojhamb_runtongy_sgullett_zybu.Tweets'].insert_many(tweetsWithIDs) repo['ojhamb_runtongy_sgullett_zybu.Tweets'].metadata( {'complete': True}) print(repo['ojhamb_runtongy_sgullett_zybu.Tweets'].metadata()) repo.logout() endTime = datetime.datetime.now() return {"start": startTime, "end": endTime}
def parse(self, response, url, brand, productName, product_id, snapDealMatch, amazonMatch): productJSON = {} if ("flipkart" in url): flipKartScrapper = FlipKartScrapper() productJSON = flipKartScrapper.downloadProductDetails( response.content, productName, brand) if ("snapdeal" in url): snapdealScrapper = SnapDealScrapper() productJSON = snapdealScrapper.downloadProductDetails( response.content, productName, brand, snapDealMatch) if ("amazon" in url): amazonScrapper = AmazonScrapper() productJSON = amazonScrapper.downloadProductDetails( response.content, productName, brand, amazonMatch) # self.saveOutPut(productJSON, outputFilePath) productJSON['product_id'] = product_id productJSON['spec_url'] = response.url # print productJSON DBOperations.mongoSaveDocument(productJSON, self.SpecificationCollection, self.specificationClient, "product_id", False)
def submit_job(min, max, N, O, res_up, res_dw, path): # node string node_range_str = "min:%d max:%d" % (min, max) # N and O string n_str = "N:" + ",".join([str(_) for _ in N]) o_str = "O:" + ",".join([str(_) for _ in O]) # res string resup_str = "res_up:" + str(res_up) resdown_str = "res_dw:" + str(res_dw) # horovod command string path_str = "path:" + path jobString = " ".join( [node_range_str, n_str, o_str, resup_str, resdown_str, path_str]) return DBOperations.submit_job_2_DBQueue(DB_path(), jobString)
def start_requests(self): AppProducts = self.getURLS() # AppProducts = [["D11111","HP","HP-1", "http://www.snapdeal.com/product/apple-imac-mf886hna-desktop-4th/633082501991", "desktop"]] allCategory = [category[4] for category in AppProducts] allCategory = list(set(allCategory)) snapDealSpecificationMatchDict = {} amazonSpecificationMatchDict = {} for cat in allCategory: if cat != "Category" and cat != '': snapDealSpecificationMatchDict[cat] = self.createMatchDict('./SpecificationMatch/' + cat + '/SpecificationMatchSnapDeal.csv') amazonSpecificationMatchDict[cat] = self.createMatchDict('./SpecificationMatch/' + cat + '/SpecificationMatchAmazon.csv') for items in AppProducts: product_id = items[0] brand = items[1] productName = items[2] url = items[3] category = items[4] try: snapDealMatch = snapDealSpecificationMatchDict[category] amazonMatch = amazonSpecificationMatchDict[category] # outputFilePath = self.outputFilePath + productName.replace("/","") + ".json" outputFilePath = "" print product_id if not DBOperations.isIdPresent(self.specificationClient, self.SpecificationCollection, "product_id", product_id): try: response = requests.get(url, timeout=5) self.parse(response,url, brand, productName, product_id, snapDealMatch, amazonMatch) except Exception, err: print(traceback.format_exc()), "Error", url, items # self.parse( meta = {'outputFilePath': outputFilePath,'brand':brand,'productName':productName,'product_id':product_id, 'snapDealMatch':snapDealMatch, 'amazonMatch':amazonMatch}) except Exception, err: print (traceback.format_exc())
def get_a_job_from_DB(): if get_job_queue_len() > 0: return DBOperations.get_Job_from_DBQueue(DB_path()) else: return None
es = EarlyStopping('val_loss', mode="min", patience=10, verbose=1) model = Model([cust_input, food_input], out) model.compile('adam', 'mean_squared_error') if os.path.exists('latest1.h5'): model = load_model('latest1.h5') else: history = model.fit([train.userId, train.foodId], train.rating, epochs=100, verbose=1, validation_split=0.3, callbacks=[es]) # 111111111111 11111 1111111 model.save('latest1.h5') plt.plot(history.history['val_loss'], label="Validation Loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.plot(history.history['loss'], label="Training Loss") plt.legend() plt.show() model.evaluate([test.userId, test.foodId], test.rating) foodData = np.array(list(set(df.foodId))) user = np.array([63 for i in range(len(foodData))]) predictions = model.predict([user, foodData]) predictions = np.array([a[0] for a in predictions]) food_ids = (-predictions).argsort()[:10] eaten = (db.getEatenFoodsOfUser(63)) new_foods = (db.getFoodsFromFoodIDs(food_ids)) recomms = [food for food in new_foods if food not in eaten][:5] indexes = [id_ for id_, food in enumerate(new_foods) if food not in eaten][:5] print(eaten) print() print(recomms) print(predictions[indexes])
def get_job_queue_len(): return DBOperations.get_DB_queue_len(DB_path())
from Scheduler import * # Bottle container and error handler (error.py is the router) app = Bottle() app.error_handler = error.handler # a simple list to store system messages system_messages = [] system_warnings = [] system_errors = [] # plans that are running current_actions = [] # database class db = DBOperations() # scheduler scheduler = Scheduler(db) original_dir = os.getcwd() # function to clear the system messages list def clear_messages(): system_messages.clear() system_warnings.clear() system_errors.clear() def check_current_builds():