def mongo_connect():

    client = MC()
    db = client.DeloitteDemo
    collection = db.phishtank

    return client, collection
def mongo_connect():

    client = MC()
    db = client.DeloitteDemo
    collection = db.reddit

    return client, collection
Esempio n. 3
0
 def __init__(self,
              db='twitterDB',
              coll='twitter_data',
              host="localhost",
              port=27017):
     self.dbName = db
     self.collName = coll
     appName = 'IO_mongo'
     self.logger = logging.getLogger(appName)
     logPath = os.getcwd() + '/log'
     fileName = appName
     fileHandler = logging.FileHandler('{}/{}.log'.format(
         logPath, fileName))
     formatter = logging.Formatter(
         '%(asctime)s - %(name)s -%(levelname)s - %(message)s')
     fileHandler.setFormatter(formatter)
     streamHandler = logging.StreamHandler()
     self.logger.addHandler(streamHandler)
     self.logger.addHandler(fileHandler)
     self.logger.setLevel(logging.DEBUG)
     try:
         self.client = MC(host=host, port=port)
         self.db = self.client[db]
         self.coll = self.db[coll]
         self.logger.info(
             'Success connect to mongo database[{}]-coll[{}]'.format(
                 db, coll))
     except Exception as e:
         self.logger.error('Connect mongodb error : %s' % e)
Esempio n. 4
0
 def __init__(self,
              host='localhost',
              port=27017,
              db='enterprise',
              collection='enterprise'):
     self.__conn = MC(host=host, port=port)
     self.db = getattr(self.__conn, db)
     self.collection = getattr(self.db, collection)
Esempio n. 5
0
    def __init__(self):
        '''initialize the database'''

        self.client = MC()
        self.db = self.client.spider_data
        self.collect = self.db.lawTextTriple

        self.extor = TripleExtractor()
Esempio n. 6
0
 def db_connect(self):
     host = "localhost"  # ip
     port = 27017  # 默认端口
     dbName = "JD_db"  # 数据库名
     # user = "******"   #用户名
     # password = ***      #密码
     MClient = MC(host=host, port=port)  # 连接MongoDB
     db = MClient[
         dbName]  # 指定数据库,等同于 use dbName # db.authenticate(user,password)  #用户验证,无用户密码可忽略此操作
     return db
Esempio n. 7
0
 def __init__(self,
              host='localhost',
              port=27017,
              db='enterprise',
              collection='enterprise'):
     try:
         self.__conn = MC(host=host, port=port)
         self.db = getattr(self.__conn, db)
         self.collection = getattr(self.db, collection)
     except Exception as e:
         raise ConnectionError(str(e) + " -- Mongo connect error")
Esempio n. 8
0
 def del_database(cls, name):
     client = MC(cls.URI)
     db_list = client.database_names()
     print("List of databases", db_list)
     as_chose = str(
         input(r"Write 'y' if you want delete database 'n' if you don't"))
     if as_chose == "y":
         client.drop_database(name)
         print("Done")
     else:
         raise Exception("Cancel action")
Esempio n. 9
0
def ExtractTweets(user,conn,dbname):
    #Enter server details below:
    client=MC(conn)
    db=client[dbname]
    #Candidates tweets to extract from MongoDB
    td=[]
    #Creating a dictionary to hold all the tweets from the presidential candidates
    collection=db[user]
    #We care only for english tweets, hence the language filter
    td1=[tweet['text'] for tweet in collection.find({"lang":"en"})]
    td.extend(td1)
    return td
Esempio n. 10
0
def get_db(db_name):
    """
    Args:
        db_name (str): The name of the MongoDB database to use
        
    Returns:
        pymongo.database.Database: A database object with the name of the input from the local server
    
    """

    client = MC('localhost:27017')
    db = client[db_name]
    return db
def ExtractTweets(presidentialCandidates, conn, dbname):
    #Enter server details below:
    client = MC(conn)
    db = client[dbname]
    #Candidates tweets to extract from MongoDB
    tweetDict = []
    #Creating a list of lists to hold all the tweets from the presidential candidates
    for candidate in presidentialCandidates:
        collection = db[candidate]
        #We care only for english tweets, hence the language filter
        tweetDict.extend([[tweet['text'], candidate]
                          for tweet in collection.find({"lang": "en"})])
    return tweetDict
Esempio n. 12
0
def ExtractTweets(users, conn, dbname):
    #Enter server details below:
    client = MC(conn)
    db = client[dbname]
    #Candidates tweets to extract from MongoDB
    td = []
    #Creating a list of lists to hold all the tweets from the presidential candidates
    for i in users:
        collection = db[i]
        #We care only for english tweets, hence the language filter
        td1 = [[tweet['text'], i] for tweet in collection.find({"lang": "en"})]
        td.extend(td1)
    print("Extraction time:\n", (time.time() - start) / 60)
    return td
Esempio n. 13
0
    def inject_mongo(self, dataList):
        """
        注入mongoDB
        :param dataList: 数据列表
        :return:
        """
        DBclient = MC()  # 打开数据库链接
        database = DBclient.relationData
        dataCollection = database.testRun5

        for x in dataList:
            if dataCollection.find_one(x) is None:
                dataCollection.insert_one(x)  # 插入数据

        return None
Esempio n. 14
0
class Clients:

    #
    # Initialize AWS S3 Client
    #
    S3 = boto3.client(
        's3',
        aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'),
        aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'),
    )
    S3_BUCKET_NAME = os.getenv('AWS_S3_BUCKET')
    S3_OUTPUT_BUCKET = os.getenv('AWS_S3_OUTPUT_BUCKET')

    #
    # Initialize MongoDB Client & Connect to Database
    #
    # The name that Back4App gives its databases can be found
    # after the last slash in MongoDB URI string
    mongo_client = MC(os.getenv('DATABASE_URI'))
    db_name = mongo_client.get_default_database().name
    MONGO = mongo_client[db_name]
Esempio n. 15
0
class PastebinSpider(CrawlSpider):

    name = 'pastebin'
    allowed_domains = ['pastebin.com']
    start_urls = ['http://www.pastebin.com/archive']
    rules = [Rule(LinkExtractor(allow=['/[a-zA-Z]*\d*']), 'parse_items')]
    client = MC()
    db = client.DeloitteDemo
    collection = db.pastebin
    #collection.remove({'p':[]})
    def parse_items(self, response):
        try:

            items = PastebinItem()
            items['url'] = response.url
            items['paste'] = response.xpath("//textarea[@id='paste_code']/text()").extract()
            items['time'] = response.xpath("//div[@class='paste_box_line2']//span[1]/@title").extract()
            items['uniq_visitors'] = response.xpath("//div[@class='paste_box_line2']//span[2]/text()").extract()
            entry = {'u': items['url'], 'p': items['paste'], 't': items['time'], 'uv': items['uniq_visitors']}
            self.collection.insert(entry)
        except:
            print "Something went wrong"
Esempio n. 16
0
# put triple into the mongodb

from athena_App.openlaw.fact_triple_1 import *
from pymongo import MongoClient as MC

client = MC()
db = client.spider_data

collect = db.lawText
writeCollect = db.lawTextTriple

data = collect.find()

check = writeCollect.find()

extor = TripleExtractor()

for item in data:

    set = 0
    for each in check:
        if each['title'] == item['judgement']['title']:
            set = 1
            check = writeCollect.find()
            break

    check = writeCollect.find()

    try:
        if set == 0:
            content = item["judgement"]["plaintext"]
Esempio n. 17
0
def connect_client(*args):
    return MC(*args)
Esempio n. 18
0
'''
68. ソート
"dance"というタグを付与されたアーティストの中でレーティングの投票数が多い
アーティスト・トップ10を求めよ.
'''
from pymongo import MongoClient as MC, DESCENDING

if __name__ == '__main__':
    c = MC('localhost', 27017).artist_db.artist_collection
    res1 = tuple(
        c.find({'tags.value': 'dance'},
               sort=[('rating.count', DESCENDING)],
               limit=10))
    res2 = tuple(
        c.find({
            'tags.value': 'dance'
        }).sort('rating.count', DESCENDING).limit(10))
    assert res1 == res2

    for d in res1:
        print(f"{d['rating']['count']:4d}| {d['name']} ({d['id']})")
Esempio n. 19
0
'''
66. 検索件数の取得
MongoDBのインタラクティブシェルを用いて,
活動場所が「Japan」となっているアーティスト数を求めよ.
'''
from pymongo import MongoClient as MC

if __name__ == '__main__':
    collection = MC('mongodb://localhost:27017/').artist_db.artist_collection
    res = sum(1 for _ in collection.find({'area': "Japan"}))
    print(res)
'''
* MongoDB のインタラクティブシェルを用いて,
  活動場所が「Japan」となっているアーティスト数を求める
    - show dbs
    - use artist_db
    - show collections
    - db.artist_collection.find({'area': 'Japan'}).count()
'''
Esempio n. 20
0
from unittest import TestCase
from pymongo import MongoClient as MC
import json
import requests
from logging_server import DEFAULT_PAGE_LENGTH
from logging_interface import Logger
from copy import copy
from datetime import datetime

mongo_connection = MC('localhost', 27017)
db = mongo_connection.logs
mongo_logs = db.logs

logger = Logger('unittests', ssl=False, apikey='chunkybacon')

sample_log = {
    "origin": "unittest_sample_log",
    "timestamp": datetime.now().isoformat(),
    "log_level": "dev",
    "message": "this is a test of posting logs from JSON in the body!"
}


def populate_logs(n):
    for i in range(n):
        log = sample_log
        log['item'] = str(n)
        requests.post('http://localhost:5000/?key=chunkybacon',
                      data=json.dumps(log))

Esempio n. 21
0
 def login_db(cls):
     client = MC(cls.URI)
     cls.CLIENT = client
     return cls.CLIENT
Esempio n. 22
0
 def __init__(self, database="coaching"):
     """Connect to the DB."""
     client = MC(config.dbHost, config.dbPort)
     self.__db = client[database]
Esempio n. 23
0
 def connectDB(self):
     db = MC(get_secret("DBURI"))["Donga"]
     return db
Esempio n. 24
0
# import pandas as pd
# import datetime as dt
from flask import Flask, render_template, redirect
#from flask_pymongo import PyMongo
from pymongo import MongoClient as MC
import pymongo
import mars

app = Flask(__name__)

# Use flask_pymongo to set up mongo connection
# app.config["MONGO_URI"] = "mongodb://*****:*****@app.route("/")
def index():
    #mars2 = mongo.db.mars2.find_one()
    # To view the content of table mars
    mars2 = col.find_one({}, sort=[("_id", pymongo.DESCENDING)])

    print(mars2)
    return render_template("index.html", mars=mars2)


@app.route("/scrape")
Esempio n. 25
0
from pymongo import MongoClient as MC

cliente = MC('mongodb://localhost:27017')

bd = cliente['universidad']
coleccion = bd['alumnos']
alumnos = coleccion.find()

for alumno in alumnos:
    print(alumno['nombre'])
Esempio n. 26
0
#build graph of law text

from neo4j import GraphDatabase as GD
from pymongo import MongoClient as MC

#initialize the database driver
driver=GD.driver("bolt://*****:*****@localhost:27017')
db=client.spider_data
collect=db.lawTextTriple

def addTripleData(gr,node1,rela,node2,title):

    #cypher script
    gr.run("MERGE (a:Des {text: $node1,belong: $title})"
           "MERGE (b:Des {text: $node2,belong: $title})"
           "MERGE (a)-[:"+rela+"{belong: $title}]->(b)",
           node1=node1,node2=node2,title=title)

with driver.session() as session:

    count=0

    #read data in the mongodb
    dataSet=collect.find()

    for item in dataSet:

        #read triple list
Esempio n. 27
0
def collection():
    client = MC()
    collection = client['newsfilter-test'].news
    yield collection
    collection.drop()
    client.close()
Esempio n. 28
0
def get_db(terms):
    cli = MC()
    db = cli['test']
    study = db['study']
    add_many(study, terms)
from pymongo import MongoClient as MC
from db_utils import DB_utils

import pandas as pd
import datetime

if __name__ == '__main__':

    # DB Init
    host = "localhost"  # ip
    port = 27017  # 默认端口
    dbName = "JD_db"  # 数据库名
    # user = "******"         #用户名
    # password = ***      #密码
    MClient = MC(host=host, port=port)  # 连接MongoDB
    MClient.drop_database("JD_db")

    # STEP 0:  fake initialization to a time t0 "2018-03-13"
    now = "2018-03-13"
    rs_topItem = 10
    sysInit = True
    files_folder, output_folder, PATH_CLICK, PATH_USER, PATH_SKU, PATH_ORDER = get_folder_setting(
    )

    if sysInit:
        af_date = (pd.to_datetime(now) + datetime.timedelta(days=1)).strftime(
            "%Y-%m-%d")  # both package works for time manipulation
        rec_from_start = load_combined_data(now=now,
                                            date_field='request_time',
                                            file_path=PATH_CLICK,
Esempio n. 30
0
 def __init__(self):
     self.app = webdriver.Remote(SERVER, desired_capabilities=DESIRE_CAP)
     self.db = MC(HOST, PORT)
     self.collection = self.db[COLLECTION]
     self.wait = WebDriverWait(self.app, TIMEOUT)