def parse_topologies(url, min_len=0, max_len=99,local = True): topologies = {} creds = None mongodb_obj = None if not local: creds = credentials() mongodb_obj = mongodb.mongodb(creds[0],creds[1],'words','topologies') fp = open(url, 'r', errors='strict') line = fp.readline() while line: line = trim_newline(line) len_line = len(line) if min_len <= len_line <= max_len: topology = get_topology(line) if topology in topologies.keys(): topologies[topology] += 1 else: topologies[topology] = 1 if not local and len(topologies) > 10000: mongodb_obj.update(topologies) topologies.clear() try: line = fp.readline() except ValueError: continue return topologies
def __init__(self, parent=None): # create pymongo db self.mongodb = mongodb.mongodb() self.sql = sql.mysql_class() QMainWindow.__init__(self) self.thread = AThread(self.analysis_cmds) # Create Twitter scrapper class instance keys = sys.argv[1] self.twitter_scrapper = twitter_scrapper(keys) # Create Results UI self.result_ui = QDialog() self.result_ui.setWindowTitle("Results") self.result_ui.setGeometry(1200, 300, 100, 50) self.result_ui.resize(300, 300) # Create Results UI self.about_ui = QDialog() self.about_ui.setWindowTitle("About") self.about_ui.resize(500, 325) # Notice ui self.notice_ui = QDialog() self.notice_ui.setWindowTitle("Notice") self.notice_ui.resize(400, 100) # The QWidget widget is the base class of all user interface objects in PyQt4. self.w = self self.w.connect(self.thread, self.thread.signal, self.post_results) # Set window size. self.w.resize(320, 240) # Set window title self.w.setWindowTitle("Flop or Not") # Setup ui elements self.setup_ui_elements() # Setup buttons and textboxes self.setup_textbox_and_buttons() # Show window self.w.show() self.result_ui.show() # Setup gradient and palette for window color self.gradient = QLinearGradient(0, 0, 0, 400) self.p = QPalette() self.set_color(255, 255, 255)
def metadata_exctract(self,db): main_thread = threading.currentThread() print "########## Meta Data IP ##########" mdb=mongodb.mongodb('localhost',27017,db) i=0 for domaine in mdb.selectall('new_domaines'): i+=1 url=domaine['url'] domaine_value=domaine['domaine'] print url if not 'meta' in domaine: domaine['meta']='ok' mtd=metadataextract.metadataextract('harvesting/metaextract.js',db,domaine_value,url) mtd.start() if i % 30==0: for t in threading.enumerate(): if t is not main_thread: t.join(2)
def metadata_exctract(self, db): main_thread = threading.currentThread() print "########## Meta Data IP ##########" mdb = mongodb.mongodb('localhost', 27017, db) i = 0 for domaine in mdb.selectall('new_domaines'): i += 1 url = domaine['url'] domaine_value = domaine['domaine'] print url if not 'meta' in domaine: domaine['meta'] = 'ok' mtd = metadataextract.metadataextract( 'harvesting/metaextract.js', db, domaine_value, url) mtd.start() if i % 30 == 0: for t in threading.enumerate(): if t is not main_thread: t.join(2)
def mongodb_via_postman(): if (request.method == 'POST'): operation = request.json['operation'] url = request.json['url'] db = request.json['db'] ob = mongodb.mongodb(url, db) collection_name = request.json['collection_name'] if (operation == 'create'): ''' for creating collection JSON format { "operation":"create", "url":connection url "db" : db name "collection_name": collection name } ''' ob.create_collection(collection_name) msg = "Table created" elif (operation == 'insert'): ''' for inserting in collection JSON format { "operation":"create", "url":connection url "db" : db name "collection_name": collection name "record": for single record a dict,for many record list of dict } ''' record = request.json['record'] ob.insert(collection_name, record) msg = "data inserted" elif (operation == 'update'): ''' for updating collection JSON format { "operation":"create", "url":connection url "db" : db name "collection_name": collection name "set": "key=value pair of columns & values to be updated" "where": "condition" } ''' set = request.json['set'] where = request.json['where'] ob.update(collection_name, set, where) msg = "data updated" if (operation == 'delete'): ''' for deleting record JSON format { "operation":"create", "url":connection url "db" : db name "collection_name": collection name "where": "condition" } ''' where = request.json['where'] ob.delete(collection_name, where) msg = "data deleted" if (operation == 'download'): ''' for downloading table JSON format { "operation":"create", "url":connection url "db" : db name "collection_name": collection name } ''' link = ob.download(collection_name) msg = "you can download data using this link : http://127.0.0.1:5000/" + link return jsonify(msg)
def fetch ( klass ): """ Returns the first instance of the class found in the MongoDB database, or None if no instances exist. This method is most useful for loading root or singleton objects stored in the database. """ return mongodb().fetch( klass )
def _mongodb_default ( self ): return mongodb()
import sys from harvesting import white_list import mongodb db=sys.argv[1] mdb=mongodb.mongodb('localhost',27017,db) for domaine in mdb.selectall('new_domaines'): dm=domaine['domaine']
password = config.get("account1", "password") seed=config.get("seed_url", "seed") db=config.get('database', 'db') collection_info=config.get('collection', 'table_info') collection_relation=config.get('collection', 'table_relation') method=Fetcher() method.login(email, password) url=seed return url, method, db, collection_info, collection_relation if __name__ == '__main__': url, method, db, collection_info, collection_relation=config_login() ip='127.0.0.1' port=27017 mongodb=mongodb(ip,port) conn=mongodb.get_conn() complete=conn.weibo.completes complete_user=complete.find()#建立complete数据库 users=set() for every_complete_user in complete_user: users.add(every_complete_user['uid'][1]) #print users relation=conn.weibo.user_relation#建立relation数据库 posts=relation.find() count=posts.count() print count if users.__len__()==0:
import sys from harvesting import white_list import mongodb db = sys.argv[1] mdb = mongodb.mongodb("localhost", 27017, db) for domaine in mdb.selectall("new_domaines"): dm = domaine["domaine"]