def main(): print "[Main] Welcome to the post parsing script!" print "[Main] Loading JSON data from file...", craigData = loadJSONData(CRAIGSLIST_SCRAPED_FILEPATH) print "done." print "[Main] Parsing JSON data...", craigDb = getCraigDb(craigData) print "done." print "[Main] Connecting to database...", phonesCollection = pymongo.MongoClient().phones_db.phones_collection print "done." print "[Main] Retrieving phone database...", phoneDb = phonesCollection.find() if phoneDb.count() == 0: print "FAIL!" print "The phone database is empty. Please initialize the database before using it." exit() print "done." print "[Main] There are " + str(phonesCollection.count()) + " phones in the database" print "[Main] Beginning to parse posts" i = 1 testTitleList = generateTestPostTileList() goldenList = generateGoldenList() scoreCounter= 0 iterationCounter = 0 for post in craigDb: copyOfDb = phoneDb.clone() # So the cursor doesn't mess up #print "Phone #" + str(i) if(post['title'] in testTitleList): phone = parsePost(post,copyOfDb) scoreCounter = scoreCounter + parsePostTester(goldenList,post,phone) iterationCounter = iterationCounter + 1 #phone = parsePost(post, copyOfDb) #parsePostTester(phone,post) #print "\n" i = i + 1 iterationCounter = iterationCounter*5 print "Total Score-" , scoreCounter,"/",iterationCounter print "[Main] Done script."
def main(args): wipeDatabase = False print "[Main] Welcome to the phone parsing script!" # Check if the --fresh flag was passed for arg in args[1:]: # The first argument is always the python filename if arg == "--fresh": wipeDatabase = True break else: print "[Main] WARNING: Option '"+arg+"' was unrecognized and has been ignored" print "[Main] Loading JSON data from file...", phoneData = loadJSONData(SMARTPHONE_SCRAPED_FILEPATH) print "done." print "[Main] Parsing raw JSON data to generate dictionaries...", phoneDb = getPhoneDb(phoneData) print "done." print "[Main] All JSON data parsed successfully." print "[Main] Opening database connection...", phonesCollection = pymongo.MongoClient().phones_db.phones_collection print "done." print "[Main] Removing possible duplicates...", phoneDb = removeDuplicates(phoneDb) print "done." if wipeDatabase: print "[Main] Fresh flag was passed, wiping out database...", phonesCollection.remove() print "done." print "[Main] Writing collection to database...", phonesCollection.ensure_index( [("manufacturer",pymongo.ASCENDING),("device",pymongo.ASCENDING)], unique=True) phonesCollection.insert(phoneDb) print "done." print "\n[Main] Application has finished successfully."
def main(args): global timeList print "[Main] Welcome to the post parsing script!" wipeDatabase = False # Check flags for arg in args[1:]: if arg == "--fresh": wipeDatabase = True break else: print "[Main] WARNING: Option '"+arg+"' was unrecognized and has been ignored" print "[Main] Loading JSON data from file...", craigData = loadJSONData(CRAIGSLIST_SCRAPED_FILEPATH) print "done." print "[Main] Parsing JSON data...", craigDb = getCraigDb(craigData) print "done." print "[Main] Connecting to database...", phonesCollection = pymongo.MongoClient().phones_db.phones_collection print "done." print "[Main] Retrieving phone database...", phoneDb = phonesCollection.find() if phoneDb.count() == 0: print "FAIL!" print "The phone database is empty. Please initialize the database before using it." exit() print "done." print "[Main] There are " + str(phonesCollection.count()) + " phones in the database" print "[Main] Opening post collection database", postsCollection = pymongo.MongoClient().posts_db.posts_collection print "done." if wipeDatabase: print "[Main] Fresh flag was passed, wiping out database...", postsCollection.remove() print "done." print "[Main] Beginning to parse posts" i = 1 postList = list() for post in craigDb: copyOfDb = phoneDb.clone() # So the cursor doesn't mess up print "Phone #" + str(i) phone = parsePost(post, copyOfDb) print "Subject: " + post['title'] postList.append(phone) print "\n" i = i + 1 if i > 100: break #PROFILE avgTime = sum(timeList)/len(timeList) print "Average time taken: " + str(avgTime) exit() postList = cleanPosts(postList) print "[Main] Writing collection to database...", postsCollection.ensure_index( [("description",pymongo.ASCENDING)], unique=True) postsCollection.insert(postList) print "done." print "[Main] Done script."