def mongo_connect(): client = MC() db = client.DeloitteDemo collection = db.phishtank return client, collection
def mongo_connect(): client = MC() db = client.DeloitteDemo collection = db.reddit return client, collection
def __init__(self, db='twitterDB', coll='twitter_data', host="localhost", port=27017): self.dbName = db self.collName = coll appName = 'IO_mongo' self.logger = logging.getLogger(appName) logPath = os.getcwd() + '/log' fileName = appName fileHandler = logging.FileHandler('{}/{}.log'.format( logPath, fileName)) formatter = logging.Formatter( '%(asctime)s - %(name)s -%(levelname)s - %(message)s') fileHandler.setFormatter(formatter) streamHandler = logging.StreamHandler() self.logger.addHandler(streamHandler) self.logger.addHandler(fileHandler) self.logger.setLevel(logging.DEBUG) try: self.client = MC(host=host, port=port) self.db = self.client[db] self.coll = self.db[coll] self.logger.info( 'Success connect to mongo database[{}]-coll[{}]'.format( db, coll)) except Exception as e: self.logger.error('Connect mongodb error : %s' % e)
def __init__(self, host='localhost', port=27017, db='enterprise', collection='enterprise'): self.__conn = MC(host=host, port=port) self.db = getattr(self.__conn, db) self.collection = getattr(self.db, collection)
def __init__(self): '''initialize the database''' self.client = MC() self.db = self.client.spider_data self.collect = self.db.lawTextTriple self.extor = TripleExtractor()
def db_connect(self): host = "localhost" # ip port = 27017 # 默认端口 dbName = "JD_db" # 数据库名 # user = "******" #用户名 # password = *** #密码 MClient = MC(host=host, port=port) # 连接MongoDB db = MClient[ dbName] # 指定数据库,等同于 use dbName # db.authenticate(user,password) #用户验证,无用户密码可忽略此操作 return db
def __init__(self, host='localhost', port=27017, db='enterprise', collection='enterprise'): try: self.__conn = MC(host=host, port=port) self.db = getattr(self.__conn, db) self.collection = getattr(self.db, collection) except Exception as e: raise ConnectionError(str(e) + " -- Mongo connect error")
def del_database(cls, name): client = MC(cls.URI) db_list = client.database_names() print("List of databases", db_list) as_chose = str( input(r"Write 'y' if you want delete database 'n' if you don't")) if as_chose == "y": client.drop_database(name) print("Done") else: raise Exception("Cancel action")
def ExtractTweets(user,conn,dbname): #Enter server details below: client=MC(conn) db=client[dbname] #Candidates tweets to extract from MongoDB td=[] #Creating a dictionary to hold all the tweets from the presidential candidates collection=db[user] #We care only for english tweets, hence the language filter td1=[tweet['text'] for tweet in collection.find({"lang":"en"})] td.extend(td1) return td
def get_db(db_name): """ Args: db_name (str): The name of the MongoDB database to use Returns: pymongo.database.Database: A database object with the name of the input from the local server """ client = MC('localhost:27017') db = client[db_name] return db
def ExtractTweets(presidentialCandidates, conn, dbname): #Enter server details below: client = MC(conn) db = client[dbname] #Candidates tweets to extract from MongoDB tweetDict = [] #Creating a list of lists to hold all the tweets from the presidential candidates for candidate in presidentialCandidates: collection = db[candidate] #We care only for english tweets, hence the language filter tweetDict.extend([[tweet['text'], candidate] for tweet in collection.find({"lang": "en"})]) return tweetDict
def ExtractTweets(users, conn, dbname): #Enter server details below: client = MC(conn) db = client[dbname] #Candidates tweets to extract from MongoDB td = [] #Creating a list of lists to hold all the tweets from the presidential candidates for i in users: collection = db[i] #We care only for english tweets, hence the language filter td1 = [[tweet['text'], i] for tweet in collection.find({"lang": "en"})] td.extend(td1) print("Extraction time:\n", (time.time() - start) / 60) return td
def inject_mongo(self, dataList): """ 注入mongoDB :param dataList: 数据列表 :return: """ DBclient = MC() # 打开数据库链接 database = DBclient.relationData dataCollection = database.testRun5 for x in dataList: if dataCollection.find_one(x) is None: dataCollection.insert_one(x) # 插入数据 return None
class Clients: # # Initialize AWS S3 Client # S3 = boto3.client( 's3', aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), ) S3_BUCKET_NAME = os.getenv('AWS_S3_BUCKET') S3_OUTPUT_BUCKET = os.getenv('AWS_S3_OUTPUT_BUCKET') # # Initialize MongoDB Client & Connect to Database # # The name that Back4App gives its databases can be found # after the last slash in MongoDB URI string mongo_client = MC(os.getenv('DATABASE_URI')) db_name = mongo_client.get_default_database().name MONGO = mongo_client[db_name]
class PastebinSpider(CrawlSpider): name = 'pastebin' allowed_domains = ['pastebin.com'] start_urls = ['http://www.pastebin.com/archive'] rules = [Rule(LinkExtractor(allow=['/[a-zA-Z]*\d*']), 'parse_items')] client = MC() db = client.DeloitteDemo collection = db.pastebin #collection.remove({'p':[]}) def parse_items(self, response): try: items = PastebinItem() items['url'] = response.url items['paste'] = response.xpath("//textarea[@id='paste_code']/text()").extract() items['time'] = response.xpath("//div[@class='paste_box_line2']//span[1]/@title").extract() items['uniq_visitors'] = response.xpath("//div[@class='paste_box_line2']//span[2]/text()").extract() entry = {'u': items['url'], 'p': items['paste'], 't': items['time'], 'uv': items['uniq_visitors']} self.collection.insert(entry) except: print "Something went wrong"
# put triple into the mongodb from athena_App.openlaw.fact_triple_1 import * from pymongo import MongoClient as MC client = MC() db = client.spider_data collect = db.lawText writeCollect = db.lawTextTriple data = collect.find() check = writeCollect.find() extor = TripleExtractor() for item in data: set = 0 for each in check: if each['title'] == item['judgement']['title']: set = 1 check = writeCollect.find() break check = writeCollect.find() try: if set == 0: content = item["judgement"]["plaintext"]
def connect_client(*args): return MC(*args)
''' 68. ソート "dance"というタグを付与されたアーティストの中でレーティングの投票数が多い アーティスト・トップ10を求めよ. ''' from pymongo import MongoClient as MC, DESCENDING if __name__ == '__main__': c = MC('localhost', 27017).artist_db.artist_collection res1 = tuple( c.find({'tags.value': 'dance'}, sort=[('rating.count', DESCENDING)], limit=10)) res2 = tuple( c.find({ 'tags.value': 'dance' }).sort('rating.count', DESCENDING).limit(10)) assert res1 == res2 for d in res1: print(f"{d['rating']['count']:4d}| {d['name']} ({d['id']})")
''' 66. 検索件数の取得 MongoDBのインタラクティブシェルを用いて, 活動場所が「Japan」となっているアーティスト数を求めよ. ''' from pymongo import MongoClient as MC if __name__ == '__main__': collection = MC('mongodb://localhost:27017/').artist_db.artist_collection res = sum(1 for _ in collection.find({'area': "Japan"})) print(res) ''' * MongoDB のインタラクティブシェルを用いて, 活動場所が「Japan」となっているアーティスト数を求める - show dbs - use artist_db - show collections - db.artist_collection.find({'area': 'Japan'}).count() '''
from unittest import TestCase from pymongo import MongoClient as MC import json import requests from logging_server import DEFAULT_PAGE_LENGTH from logging_interface import Logger from copy import copy from datetime import datetime mongo_connection = MC('localhost', 27017) db = mongo_connection.logs mongo_logs = db.logs logger = Logger('unittests', ssl=False, apikey='chunkybacon') sample_log = { "origin": "unittest_sample_log", "timestamp": datetime.now().isoformat(), "log_level": "dev", "message": "this is a test of posting logs from JSON in the body!" } def populate_logs(n): for i in range(n): log = sample_log log['item'] = str(n) requests.post('http://localhost:5000/?key=chunkybacon', data=json.dumps(log))
def login_db(cls): client = MC(cls.URI) cls.CLIENT = client return cls.CLIENT
def __init__(self, database="coaching"): """Connect to the DB.""" client = MC(config.dbHost, config.dbPort) self.__db = client[database]
def connectDB(self): db = MC(get_secret("DBURI"))["Donga"] return db
# import pandas as pd # import datetime as dt from flask import Flask, render_template, redirect #from flask_pymongo import PyMongo from pymongo import MongoClient as MC import pymongo import mars app = Flask(__name__) # Use flask_pymongo to set up mongo connection # app.config["MONGO_URI"] = "mongodb://*****:*****@app.route("/") def index(): #mars2 = mongo.db.mars2.find_one() # To view the content of table mars mars2 = col.find_one({}, sort=[("_id", pymongo.DESCENDING)]) print(mars2) return render_template("index.html", mars=mars2) @app.route("/scrape")
from pymongo import MongoClient as MC cliente = MC('mongodb://localhost:27017') bd = cliente['universidad'] coleccion = bd['alumnos'] alumnos = coleccion.find() for alumno in alumnos: print(alumno['nombre'])
#build graph of law text from neo4j import GraphDatabase as GD from pymongo import MongoClient as MC #initialize the database driver driver=GD.driver("bolt://*****:*****@localhost:27017') db=client.spider_data collect=db.lawTextTriple def addTripleData(gr,node1,rela,node2,title): #cypher script gr.run("MERGE (a:Des {text: $node1,belong: $title})" "MERGE (b:Des {text: $node2,belong: $title})" "MERGE (a)-[:"+rela+"{belong: $title}]->(b)", node1=node1,node2=node2,title=title) with driver.session() as session: count=0 #read data in the mongodb dataSet=collect.find() for item in dataSet: #read triple list
def collection(): client = MC() collection = client['newsfilter-test'].news yield collection collection.drop() client.close()
def get_db(terms): cli = MC() db = cli['test'] study = db['study'] add_many(study, terms)
from pymongo import MongoClient as MC from db_utils import DB_utils import pandas as pd import datetime if __name__ == '__main__': # DB Init host = "localhost" # ip port = 27017 # 默认端口 dbName = "JD_db" # 数据库名 # user = "******" #用户名 # password = *** #密码 MClient = MC(host=host, port=port) # 连接MongoDB MClient.drop_database("JD_db") # STEP 0: fake initialization to a time t0 "2018-03-13" now = "2018-03-13" rs_topItem = 10 sysInit = True files_folder, output_folder, PATH_CLICK, PATH_USER, PATH_SKU, PATH_ORDER = get_folder_setting( ) if sysInit: af_date = (pd.to_datetime(now) + datetime.timedelta(days=1)).strftime( "%Y-%m-%d") # both package works for time manipulation rec_from_start = load_combined_data(now=now, date_field='request_time', file_path=PATH_CLICK,
def __init__(self): self.app = webdriver.Remote(SERVER, desired_capabilities=DESIRE_CAP) self.db = MC(HOST, PORT) self.collection = self.db[COLLECTION] self.wait = WebDriverWait(self.app, TIMEOUT)