def getListQuery(lis,dbase,coll):
    from pymongo import MongoClient as mc
    from collections import Counter
    check = 0
    client = mc()
    if dbase == "testIndex":
        client = mc("192.168.103.25")
    if dbase=="wiki":
    	return wiki.main(lis)
    db = client[dbase]

    results=db[coll].find({"keyword":{"$in":lis}},{"keyword":0,"_id":0})
    listCounters = list(results)
    if dbase  == "news" or dbase == "youtube":
        if len(lis) == len(listCounters):
            return list(getNewContent(listCounters))
    else:
        check =1
    
    result=SumDict({})
    if check ==1:
    
        for listCounter in listCounters:
            result.merge(listCounter)
    
        sor=sorted(result, key=result.get, reverse=True)
        return sor
    else:
        return []
Beispiel #2
0
def getNews():
    client = mc(host)
    collection = client['mydb']['nba_news']
    lines = open('links.tmp', 'r').readlines()

    toggle, title, link = True, None, None
    for l in lines:
        if toggle:
            title = l.strip()
        else:
            link = l.strip()

            req = requests.get('{}/{}'.format(head, link))
            page = soup(req.text, 'html.parser')
            section = page.find('section')
            section = '<html><body>{}</body></html>'.format(str(section))
            article = soup(section, 'html.parser').find_all('p')

            content = ''.join([p.text for p in article])
            print title, link, content

            doc = {
                "title": title,
                "link": '{}/{}'.format(head, link),
                "content": content.replace("\"", "\'")
            }
            collection.insert_one(doc)

        toggle = not toggle

    print collection.count()
def conn_db(host, port, db_name):
    connection = mc(host, port)
    db = connection[db_name]
    # MongoDB creates a collection implicitly when the collection is first referenced in a comman
    # So we hardcode a collection name here
    #collection = db["ibmdocs"] 
    return db
Beispiel #4
0
 def db_init(self):
     client = mc('127.0.0.1', 27017)
     self.db1 = client['review']['douban']
     self.db1.ensure_index([('href', pymongo.ASCENDING)],
                           unique=True,
                           dropDups=True)
     self.db2 = client['review']['all_book_id']
Beispiel #5
0
def gallery_spider(url,type):
    import urllib.request as fetch
    import re
    from pymongo import MongoClient as mc
    import urllib

    response=fetch.urlopen(url)
    response=response.read().decode('utf8')
    pattern=re.compile('<div class="roomblock.*?">(.*?)</div>',re.S)
    results=re.findall(pattern,response)
    ikea='http://www.ikea.com'
    i=0
    client=mc()
    db=client.IKEA
    gallery_url=db.GalleryUrl
    gallery_url.update_many({},{'$set':{'iscrawled':0}})
    for room in results:
        #print(room)
        pattern_url=re.compile('<a title=".*?" href="(.*?)">',re.S)
        pattern_img=re.compile('<img alt=".*?" title=".*?" src="(.*?)".*?>')
        result_url=re.findall(pattern_url,room)
        result_img=re.findall(pattern_img,room)
        if(result_url):
            room_url=ikea+result_url[0]
            gallery_url.insert_one({'type':type,'url':room_url,'iscrawled':0})
        if(result_img):
            room_img=ikea+result_img[0]
            filename=type+'_'+str(i)+".jpg"
            urllib.request.urlretrieve(room_img, filename)

            i += 1
        print(room_url,room_img)
Beispiel #6
0
def getNews():
  client = mc(host)
  collection = client['mydb']['nba_news']
  lines = open('links.tmp', 'r').readlines()

  toggle, title, link = True, None, None
  for l in lines:
    if toggle:
      title = l.strip()
    else:
      link = l.strip()
      
      req = requests.get('{}/{}'.format(head, link))
      page = soup(req.text, 'html.parser')
      section = page.find('section')
      section = '<html><body>{}</body></html>'.format(str(section)) 
      article = soup(section, 'html.parser').find_all('p')

      content = ''.join([ p.text for p in article ])
      print title, link, content

      doc = {
          "title": title,
          "link": '{}/{}'.format(head, link),
          "content": content.replace("\"", "\'")
      }
      collection.insert_one(doc)

    toggle = not toggle

  print collection.count()
Beispiel #7
0
 def save_db(self):
     """保存到数据库"""
     import time
     from pymongo import MongoClient as mc
     conn = mc(host=self.fundb['host'], port=self.fundb['port'])
     conn = conn.get_database(self.fundb['db'])
     conn = conn.get_collection(self.fundb['coll'])
     conn.update({'name': self.fiction['name']}, {
         '$set': {
             'name': self.fiction['name'],
             'author': self.fiction['author'],
             'fromsite': self.fiction['fromsite'],
             'url': self.fiction['url'],
             'num': self.fiction['num'],
             'update': int(time.time())
         }
     },
                 upsert=True)
     for item in self.fiction['chapter']:
         if item['status']:
             conn.update({'name': self.fiction['name']}, {
                 '$push': {
                     'chapter': {
                         'title': item['title'],
                         'url': item['url'],
                         'status': item['status']
                     }
                 }
             })
 def __getClient():
     conn = mc("mongodb://localhost:27017/",
               username="******",
               password="******",
               authSource="chatbot_database",
               authMechanism="SCRAM-SHA-1")
     print(conn.list_database_names())
     return conn["db"]
Beispiel #9
0
 def save_data(self,db,table):
     """ save data to a mongodb database. """
     try:
         from pymongo import MongoClient as mc
         conn = mc() # default host/port
         # TODO: add remaining database selection and data insertion.
     except:
         return False
Beispiel #10
0
def replace_docs(db_name, col_name, docs):
    client = mc(get_secret())
    db = client.get_database(db_name)
    col = db[col_name]

    for doc in docs:
        thread_filter = {"thread_num": doc['thread_num']}
        col.find_one_and_replace(thread_filter, doc, upsert=True)
Beispiel #11
0
def get_db():
	client=mc("mongodb://'%s':'%s'@clusterkamal-shard-00-00-henbx.mongodb.net:27017,clusterkamal-shard-00-01-henbx.mongodb.net:27017,clusterkamal-shard-00-02-henbx.mongodb.net:27017/test?ssl=true&replicaSet=clusterkamal-shard-0&authSource=admin%('myname','mypassword')"
	)
	#client=mc()
	db=client.agra_base
	if db:
		print("Connected")
	return db
Beispiel #12
0
 def __init__(self, host, port, db_name):
     try:
         self.client = mc(host, port)
         # self.client = mc('mongodb://192.168.3.123:27017')
         self.client.admin.command('ismaster')
         self.db = self.client[db_name]
     except Exception, e:
         print u'cannot connect database!', repr(e)
         sys.exit(0)
Beispiel #13
0
 def connect(self):
     if self.tipo == 'mysql':
         import mysql.connector as my
         self.con = my.connect(**self.con_Data)
         self.error = my.Error
     elif self.tipo == 'mongo':
         from pymongo import MongoClient as mc
         client = mc(self.con_Data['server'], int(self.con_Data['port']))
         db = client[self.con_Data['database']]
         self.con = db[self.con_Data['collection']]
Beispiel #14
0
 def connect(self):
     if self.tipo == 'mysql':
         import mysql.connector as my
         self.con = my.connect(**self.con_Data)
         self.error = my.Error
     elif self.tipo == 'mongo':
         from pymongo import MongoClient as mc
         client = mc(self.con_Data['server'], int(self.con_Data['port']))
         db = client[self.con_Data['database']]
         self.con = db[self.con_Data['collection']]
Beispiel #15
0
 def login(self, server="localhost", port=27017, user="", pwd=""):
     if server == "":
         server = self.conf["server"]
         port = int(self.conf["port"])
         user = self.conf["user"]
         pwd = self.conf["pwd"]
     if user == "":
         uri = 'mongodb://{}:{}'.format(server, port)
     else:
         uri = 'mongodb://{}:{}@{}:{}'.format(user, pwd, server, port)
     self.client = mc(uri)
Beispiel #16
0
def insert_new_docs(db_name, col_name, docs):
    client = mc(get_secret())
    db = client.get_database(db_name)
    col = db[col_name]
    requests = []

    for doc in docs:
        thread_filter = {"thread_num": doc['thread_num']}
        requests.append(UpdateOne(thread_filter, {'$set': doc}, upsert=True))

    col.bulk_write(requests)
Beispiel #17
0
def index():

    # connect to mongoDB
    connection = mc('localhost', 27017)

    # attach to test database
    db = connection.test


    # get handle for names collection
    name = db.names

    # find a single document
    item = name.find_one()

    return '<b>Hello %s!</b>' % item['name']
Beispiel #18
0
def write_to_mongo(docs, collection, dup=False):
    assert docs and collection

    client = mc(mongo_host[random.randint(0, 2)])
    database = client[db]
    #database.authenticate(username, password=username)
    collection = database[collection]

    count = 0

    for doc in docs:
        if dup:
            try:
                collection.insert_one(doc)
            except pymongo.errors.DuplicateKeyError, e:
                print e
        elif collection.find_one({'md5': doc['md5']}) is None:
            collection.insert_one(doc)
def write_to_mongo(docs, collection, dup=False):
    assert docs and collection 

    client = mc(mongo_host[random.randint(0, 2)])
    database = client[db]
    #database.authenticate(username, password=username)
    collection = database[collection]

    count = 0

    for doc in docs:
        if dup:
            try:
                collection.insert_one(doc)
            except pymongo.errors.DuplicateKeyError, e:
                print e
        elif collection.find_one({'md5': doc['md5']}) is None:
                collection.insert_one(doc)
def main_action():
    client = mc()
    db = client.ITjuzi
    tobecaptured = db.ToBeCaptured
    pages = tobecaptured.find({'done': 0})
    number = pages.count()
    events = db.InvestmentEvent
    for page in pages:
        time = 3 * random.random()
        sleep(time)
        pageid = page['pageid']
        print('now fetching page ' + str(pageid))
        data = investmentevent(pageid)
        for each in data:
            print(each)
            events.insert_one({'data': each})
        tobecaptured.update_one({'pageid': pageid}, {'$set': {'done': 1}})
        number -= 1
        print(str(number) + '...left')
Beispiel #21
0
def write_to_mongo(docs, collection, dup=False):
    assert docs and collection 

    client = mc(host)
    database = client[db]
    database.authenticate(username, password=username)
    collection = database[collection]

    count = 0

    for doc in docs:
        if dup is True:
            collection.insert_one(doc)
        elif collection.find_one({'md5': doc['md5']}) is None:
            collection.insert_one(doc)

        print 'Inserted #%s...' % count
        count += 1

    time.sleep(30)

    print collection.count()
    assert collection.count() == count
Beispiel #22
0
import urllib.request as request
import re
from pymongo import MongoClient as mc
import datetime
from time import sleep
from random import random as r
from eventlet.timeout import Timeout

client = mc()
db = client.GoRatings
players = db.TopRatings.find({'isCrawled': 0})
games = db.GameResults
ratings = db.TopRatings
num = players.count()
for player in players:
    sleep(4 * r())
    player_name = player['name']
    print(player_name)
    url = "http://www.goratings.org/players/" + str(
        player['playerid']) + ".html"
    original = request.urlopen(url)
    response = original.read().decode('utf8')

    print('there are something')
    pattern = re.compile('''<tr><td>(.*?)</td><td>(.*?)</td>
<td>(.*?)</td>
<td>(.*?)</td>
<td><a href="(.*?)\.html">(.*?)</a></td>
<td>(.*?)</td>
<td><a href="http://www.go4go.net/go/games/sgfview/(.*?)">View game</a></td>
</tr>''')
Beispiel #23
0
def get_db_client():
    if 'dbcl' not in g:
        g.dbcl = mc('mongodb://10.131.65.27:27017/')
    return g.dbcl
##pithytimeout=0 
from commands import getoutput as go
print go("pip install sseclient")
from sseclient import SSEClient
import json
import time
from pymongo import MongoClient as mc

mip = "" ##Your MongoDB Address here

client = mc(mip,27017)
#print client
cores = {}

ac = "" #->Get you token for this from Particle Build
get_event = "BOYA_fridge" #this is the event we declared in "particle.publish"

while True:
    try:
        messages = SSEClient(
            'https://api.particle.io/v1/events/%s?access_token=%s' %(get_event,ac) 
            )
        #connecto the streaming https interface from particle, get the event for our token
        for msg in messages: #for each message that comes in
            try:
                foo = msg.data.replace("nan","NaN") #format NaN correctly for python
                total = json.loads(foo) #load the dataset as a JSON string
                data = json.loads(total['data']) #load the data payload as a JSON string
                data['time'] = time.time() #add a timestamp
                data['coreid'] = total['coreid'] #add the particle name
                try: #try to make an event index for the structure
Beispiel #25
0
                son[key] = to_binary(value)
            elif isinstance(value, dict):
                son[key] = self.transform_incoming(value, collection)
        return son

    def transform_outgoing(self, son, collection):
        for (key, value) in son.items():
            if isinstance(value, Binary) and value.subtype == 128:
                son[key] = from_binary(value)
            elif isinstance(value, dict):
                son[key] = self.transform_outgoing(value, collection)
        return son

"""
#client = mc()
client = mc('agm2.local', 27017)

mydb = client.tutorial
up = mydb.topics
"""
# following is for insertion of new records
topics = {"author": "Duke",
           "title" : "PyMongo 101",
           "tags" : ["MongoDB", "PyMongo", "Tutorial"],
           "date" : datetime.utcnow()
          }

up_id = up.insert(topics)

new_posts = [{"author": "Mike",
               "text": "Another post!",
Beispiel #26
0
# -*- coding: utf-8 -*-
"""
Created on 2017/10/28 下午12:57
@author: SimbaZhang
"""

import requests
import time
import re
from utils.log import logger
from pymongo import MongoClient as mc
from bs4 import BeautifulSoup as bs
client = mc('127.0.0.1', 27017)
db = client['review']['all_book_id']


class BookId(object):
    def __init__(self):
        self.start_url = 'https://book.douban.com/tag/?view=type&icn=index-sorttags-hot'
        self.header1 = {
            'User-Agent':
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
        }
        self.header2 = {
            'Host':
            'book.douban.com',
            'User-Agent':
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
        }

    def get_book_type(self):
Beispiel #27
0
from pymongo import MongoClient as mc
import re
import urllib.request as fetch
from time import sleep
import random

client=mc()
db=client.IKEA
gallery_gadget=db.GalleryGadget
gallery_url=db.GalleryUrl
tobe=gallery_url.find({'iscrawled':2})
#gallery_url.update_many({},{'$set':{'iscrawled':0}})
numbers=tobe.count()
for each in tobe:
    print(numbers)
    sleep(random.random()*2)
    gadgat=[]
    url=each['url']
    response=fetch.urlopen(url)
    response=response.read().decode('utf8')
    print(response)
    #pattern_gadget=re.compile('<a title=".*?" href="/cn/zh/catalog/products/(.*?)/">',re.S)
    pattern_gadget=re.compile('href="/cn/zh/catalog/products/(.*?)/',re.S)
    result=re.findall(pattern_gadget,response)
    print(result)
    if(result):
        for each in result:
            if each not in gadgat:
                gadgat.append(each)
                print(each)
                print(url)
Beispiel #28
0
 def __init__(self):
     self.client = mc()
     self.db = self.client.scantool
Beispiel #29
0
from flask import render_template, flash
from flask import request, redirect, url_for
from flask_security import login_required
from flask_login import logout_user
from extension import (render_with_opt, get_category, app, SearchForm)
from path_finder import playFinder
from station import get_station
from dbconfig import *
from pymongo import MongoClient as mc

bikeCol = get_station()
client = mc('mongodb://150.95.204.252:27017')
db = client.get_database('map_db')
collection = db.get_collection('place')


@app.route('/', methods=['GET', 'POST'])
def main():
    return redirect("/index/")


@app.route('/index/', methods=['GET', 'POST'])
def index():
    locsearch = SearchForm()

    if request.method == "POST":
        try:
            src = request.form["src"]
            dest = request.form["dest"]
            print(src, dest)
            bike = playFinder(src.replace('대한민국 ', ''),
Beispiel #30
0
Created on 31/10/2014

@author: aurelio
'''
import time as tm
import re, sys
import subprocess
from pymongo import MongoClient as mc
from operator import itemgetter


inicio = tm.time()
#port numbers for freeling in server
port1 = 50005
port2 = 50006   
client = mc('localhost', 27017)
mydb = client.wiki
col = mydb.pages

topic = {"title" : 'Instituto de Crédito Oficial'}

f = col.find_one(topic)
if f and len(f['redirect']) > 10:
    redireccion = re.sub('#REDIRECT', '',f['redirect']).strip()
    topic = {"title" : redireccion}
    f = col.find_one(topic)
    if not f:
        print("no encontrado")
        sys.exit()
elif not f or len(f['texto']) < 80:
    print("no encontrado")
Beispiel #31
0
 def __init__(self):
     self.client = mc()
     self.db = self.client.nodeDB
Beispiel #32
0
                son[key] = to_binary(value)
            elif isinstance(value, dict):
                son[key] = self.transform_incoming(value, collection)
        return son

    def transform_outgoing(self, son, collection):
        for (key, value) in son.items():
            if isinstance(value, Binary) and value.subtype == 128:
                son[key] = from_binary(value)
            elif isinstance(value, dict):
                son[key] = self.transform_outgoing(value, collection)
        return son

"""
#client = mc()
client = mc('agm2.local', 27017)

mydb = client.tutorial
up = mydb.topics
"""
# following is for insertion of new records
topics = {"author": "Duke",
           "title" : "PyMongo 101",
           "tags" : ["MongoDB", "PyMongo", "Tutorial"],
           "date" : datetime.utcnow()
          }

up_id = up.insert(topics)

new_posts = [{"author": "Mike",
               "text": "Another post!",
Beispiel #33
0
from django.shortcuts import render
from django.http import HttpResponse
from datetime import datetime as T

from pymongo import MongoClient as mc
client = mc("mongodb://localhost:27017/myProject")
db=client.nayan
entries=db.entries
hosts=db.hosts

import smtplib
gmail_user = '******' 
gmail_password = '******'  


def checkout(request):
    if request.method== 'POST':
        visitor=entries.find_one({'visitorName':request.POST.get('visitorName'),'checkoutTime':None})
        if visitor==None:
            return render(request ,'home/msg.html',{'msg':"Not Found"})
        checkoutTime=T.now()
        entries.update_one(visitor,
            {"$set": {'checkoutTime':checkoutTime}}
             )
        to = visitor["visitorEmail"]
        subject = 'OMG Super Important Message'
        body1= f'hostName:{visitor["hostName"]}\nhostEmail:{visitor["hostEmail"]}\nhostPhoneNo:{visitor["hostPhoneNo"]}\nhostAdress:{visitor["hostAdress"]}\n'
        
        body2=f'visitorName:{visitor["visitorName"]}\nvisitorEmail:{to}\nvisitorPhoneNo:{visitor["visitorPhoneNo"]}\nvisitorAdress:{visitor["visitorAdress"]}\n'
        
        body3=f'checkinTime:{visitor["checkinTime"]}\ncheckoutTime:{checkoutTime}\n'
Beispiel #34
0
from pymongo import MongoClient as mc
from datetime import datetime as dt
import pandas as pd
from pithy import *
from datetime import datetime as dt

#Log into database
client = mc("radical-edward.princeton.edu", 47017)

#Find All Distinct Core DS
dist = client.mae221.lab5_1.distinct('coreid')

print "These Photons have checked in with Big Brother:"

count = 1
for d in dist:
    print "%i) %s" % (count, d)
    count += 1

print
#You can make, instead, a list of your coreids here
# dist = []
# dist.append('27001b000247343337373739')
# dist.append('2f002a000247343339373536')

#For each Photon

#hours to check
hr = 2

for core in dist:
Beispiel #35
0
from config import *
import jieba as jb

encode = 'utf-8'

acam = ac.Automaton()
user_dict = cc.open(user_dict_path, 'r', encode)

special_words = [word.strip().encode(encode) for word in user_dict.readlines()]

for idx, word in enumerate(special_words):
    acam.add_word(word, idx)

acam.make_automaton()

mongo = mc()
db = mongo.Message
msg_overdue = db.msg_overdue

special_words_set = set(special_words)
all_words = special_words_set.copy()


def parse_group(documents, fout=None):
    print fout
    global all_words
    for doc in documents:
        str_text = doc['text']
        words = [
            special_words[item[1]]
            for item in acam.iter(str_text.encode(encode))
Beispiel #36
0
import time
from numpy import *
import json
from commands import getoutput as go


#Flask Imports
from flask import Flask,request,Response,send_file
from functools import wraps

sets = json.load(open("settings.json"))
mip  = sets['mongo_ip']
port = sets['port']


cli = mc(mip)

#Instantiate Server
app = Flask(__name__)

#This pulls the node data
@app.route("/data/",methods=['POST'])
def out(var=None):
    form = json.loads(request.get_data())
    out = {}
    try:
        db  = form['db']
        col = form['col']
        q = form['q']
        #data['time'] = time.time()
        df = pd.DataFrame(list(cli[db][col].find(q)))
def getListQuery(lis,dbase,coll):
    from pymongo import MongoClient as mc
    from collections import Counter
    check = 0
    client = mc()
    if dbase == "testIndex":
        client = mc("192.168.103.25")
    if dbase=="wiki":
    	def wikiMain():
    		global wikiAns,wikiAnsGot,check
	    	db = MySQLdb.connect("localhost","root","","wiki" )
	    	cursor = db.cursor()
	    	#print lis
	    	if len(lis)>1:
	    		sql = """SELECT json_data from wordmatrix where word IN %s"""%(str(tuple(lis)))
	    	elif len(lis)==1:
	    		sql = """SELECT json_data from wordmatrix where word ='%s'"""%(str(lis[0]))
		try:
		   # Execute the SQL command
		   cursor.execute(sql)
		   #print sql
		   # Commit your changes in the database
		   db.commit()
		   res = cursor.fetchall()
		   listCounters=[]
		   listCounter=[]
		   ke=set([])#stores common keys
		   first=True
		   #Storing common keys in ke
		   for r in res:
		   	d=json.loads(r[0].replace("#dot#","."))
		   	#print d
		   	if first:
		   		ke=set(d.keys())
		   		first=False
		   	else:
		   		ke=set(d.keys()).intersection(set(ke))
		   	listCounters.append(d)
		   	if len(list(ke))==0:
		   		wikiAns=[]#no results got
		   		wikiAnsGot=True
		   for l in listCounters:
		   	d={}#creating dict from common keys
		   	for k in list(ke):
		   		d[k]=l[k]
		   	listCounter.append(d)
		   check =1#no idea why prashant used this
		   result=SumDict1({})#Sum of dicts in listCounters will be stored here
		   if check ==1:
			for li in listCounter:
				result.merge(li)
		    	#print result
		    	global sor
		   	sor=sorted(result, key=result.get, reverse=True)
		   	#if we already have 1 for 1 keyword and title is 2 keyword
		   	#ex sachin tendulkar we got Sachin:1 but not right results
		   	#right on is Sachin:0s.5 Tendulkar:0.5
		   	#below for loop for doing exact match
		   	
		   	for ind,term in enumerate(sor):
		   		#remove punctuataion ex ajay k. sood vs ajay k sood
		   		t=term.lower()#remove_punctuations(term)# this does not remove hash tags
		   		redirect=False
		   		if "#r#" in t and t.replace("#r#","")=="_".join(lis):
		   			#This condition is for checking if user query matches to a redirect page
	       		   		resolved=resolveRedirects(term.replace("#r#",""))
			   		sor[ind],sor[0]=sor[0],resolved
					wikiAns=[sor[0]]  
					wikiAnsGot=True  
					break				
		   		elif t.lower()=="_".join(lis):
					#Not a redirect but a exact match
			   		sor[ind],sor[0]=sor[0],sor[ind]
			   		b=checkForDis(sor)
					wikiAns=[b]
					wikiAnsGot=True
					break
			#did'nt got exact match just return 1
			#Exact match with user query not found but found something which adds upto 1
		   	if result[sor[0]]==1.0:
		   		#print result[sor[0]]
		   		if "#r#" in sor[0]:
		   			resolved=resolveRedirects(sor[0].replace("#r#",""))
			   		sor[0]=resolved
		   			wikiAns=[sor[0]]
		   			wikiAnsGot=True
		   		else:
		   			b=checkForDis(sor)
		   			wikiAns=[b]
		   			wikiAnsGot=True
		   	else:
		   		rnks=compareRanks(sor)
		   		if not len(rnks)==0:
		   			if "#r#" in rnks[0]:
		   				wikiAns=[resolveRedirects(rnks[0].replace("#r#",""))]
		   				wikiAnsGot=True
		   			else:
		   				wikiAns=[rnks[0]]
		   				wikiAnsGot=True
		   		else:
		   			if "#r#" in sor[0]:
		   				wikiAns=[resolveRedirects(sor[0].replace("#r#",""))]
		   				wikiAnsGot=True
		   			else:
		   				b=checkForDis(sor)
		   				wikiAns=[b]#worst result not exact match not ranked
		   				wikiAnsGot=True
		   else:
		   	wikiAns=[]
		   	wikiAnsGot=True
		except Exception as e:
		   #print e
		   # Rollback in case there is any error
		   db.rollback()

		# disconnect from server
		db.close()
	def waitForAns():
		global wikiAnsGot
		while not wikiAnsGot:
			pass
	q="".join(lis)
	th1=Process(target=checkInCache,args=(q,))
	th1.start()
	th2=Process(target=wikiMain,args=())
	th2.start()
	tt=Process(target=waitForAns,args=())
	tt.start()
	tt.join()
	return wikiAns
    #below portion is shared by both news and general
    db = client[dbase]
    results=db[coll].find({"keyword":{"$in":lis}},{"keyword":0,"_id":0})
    listCounters = list(results)
    if dbase  == "news" :
        #print len(listCounters),len(lis),lis
        if len(lis) == len(listCounters):
            #check =1
            #print list(getNewContent(listCounters))
            return list(getNewContent(listCounters))
    else:
        check =1
    
    result=SumDict({})
    if check ==1:
    
        for listCounter in listCounters:
            result.merge(listCounter)
    #print result
        sor=sorted(result, key=result.get, reverse=True)
        #print sor
        return sor
    else:
        return []
Beispiel #38
0
def xlsx():
    p = xl.load_workbook("Brand.xlsx")
    ob = p.active

    conn = mc('localhost', 27017)
    db = conn.Medicine.THCs
    db1 = conn.Medicine.SubTHCs
    db2 = conn.Medicine.manufacturer
    db3 = conn.Medicine.MedBrands
    db4 = conn.Medicine.medicine

    for i in range(2, 320798):
        on = ob.cell(row=i, column=1)
        off = ob.cell(row=i - 1, column=1)
        if on.value != off.value:
            path = {
                "Name": on.value,
                "Description": " ",
                "created": dt.datetime.utcnow(),
                "LastModified": dt.datetime.utcnow()
            }
            db.insert_one(path)

        on1 = ob.cell(row=i, column=2)
        off1 = ob.cell(row=i - 1, column=2)
        if on1.value != off1.value:
            path = {
                "Thcs": on.value,
                "SubThcs": on1.value,
                "Description": " ",
                "created": dt.datetime.utcnow(),
                "LastModified": dt.datetime.utcnow()
            }
            db1.insert_one(path)

        on2 = ob.cell(row=i, column=7)
        off2 = ob.cell(row=i - 1, column=7)

        if on2.value != off2.value:
            path = {
                "ManufacturerName": on2.value,
                "Address": " ",
                "created": dt.datetime.utcnow(),
                "LastModified": dt.datetime.utcnow()
            }
            db2.insert_one(path)

        on32 = ob.cell(row=i, column=3)
        off32 = ob.cell(row=i - 1, column=3)

        on31 = ob.cell(row=i, column=6)
        off31 = ob.cell(row=i - 1, column=6)

        if on31.value != off31.value and on32.value != off32.value:
            path = {
                "ManufactName": on2.value,
                "Composition": on31.value,
                "MedicineBrand": on32.value,
                "Description": " ",
                "created": dt.datetime.utcnow(),
                "LastModified": dt.datetime.utcnow()
            }
            db3.insert_one(path)

        on41 = ob.cell(row=i, column=4)
        off41 = ob.cell(row=i - 1, column=4)

        on42 = ob.cell(row=i, column=5)
        off42 = ob.cell(row=i - 1, column=5)

        on43 = ob.cell(row=i, column=9)
        off43 = ob.cell(row=i - 1, column=9)

        on44 = ob.cell(row=i, column=8)
        off44 = ob.cell(row=i - 1, column=8)
        if on41.value != off41.value and on42.value != off42.value and on43.value != off43.value and on44.value != off44.value:
            path = {
                "SubThc": on1.value,
                "medForm": on42.value,
                "MedicinBrand": on32.value,
                "HsnCode": on41.value,
                "MaxRP": on43.value,
                "PrimaryPack": on44.value,
                "SecondaryPack": " ",
                "TertiaryPack": " ",
                "created": dt.datetime.utcnow(),
                "LastModified": dt.datetime.utcnow()
            }
            db4.insert_one(path)
def getListQuery(lis1,dbase,coll):
    from pymongo import MongoClient as mc
    from collections import Counter
    check = 0
    client = mc()
    if dbase == "newIndex":
        client = mc("192.168.103.59")
    if dbase=="wiki":
    	def wikiMain():
    		global wikiAns,wikiAnsGot,check,lis
    		lis=lis1
	    	db = MySQLdb.connect("localhost","root","#srmseONserver1","wiki" )
	    	cursor = db.cursor()
	    	#print lis
	    	#length are used to check to support the string format of tuple as len 1 tuple is (key,) the comma creates problem with IN query in mysql
	    	if len(lis)>1:
	    		sql = """SELECT `word`,`json_data` from wordmatrix where word IN %s"""%(str(tuple(lis)))
	    	elif len(lis)==1:
	    		sql = """SELECT `word`,`json_data` from wordmatrix where word ='%s'"""%(str(lis[0]))
		try:
		   cursor.execute(sql)
		   #print sql
		   db.commit()
		   res = cursor.fetchall()
		   listCounters=[]#initially holds all the the data of keys
		   listCounter=[]#holds the dics obtained from common keys
		   ke=set([])#stores common keys
		   first=True
		   loadedCache={}#stores loaded data from json.loads
		   for r in res:
		   	d=json.loads(r[1].replace("#dot#","."))
		   	loadedCache[r[0]]=d
		   	if first:
		   		ke=set(d.keys())
		   		first=False
		   	else:
		   		ke=set(d.keys()).intersection(set(ke))
		   	listCounters.append(d)
		   if len(list(ke))==0:
		   	#"got no common keys trying to remove stop words"
		   	words=removeStopWords(" ".join(lis))
		   	q=" ".join(words)
			global lis
			lis=words
			th2=Process(target=checkInCache,args=(q,))
			th2.start()
		   	listCounters=[]
		   	listCounter=[]
		   	ke=set([])#stores common keys
		   	first=True
		   	#Storing common keys in ke
		   	for r in res:
		   		if r[0] in words:
			   		d=loadedCache[r[0]]
			   		if first:
			   			ke=set(d.keys())
			   			first=False
			   		else:
			   			ke=set(d.keys()).intersection(set(ke))
		   			listCounters.append(d)
		   	if len(list(ke))==0:
		   		wikiAns=[]#no results got
		   		wikiAnsGot=True
		   for l in listCounters:
		   	d={}#creating dict from common keys
		   	for k in list(ke):
		   		d[k]=l[k]
		   	listCounter.append(d)
		   check =1#no idea why prashant used this
		   result=SumDict1({})#Sum of dicts in listCounters will be stored here
		   if check ==1:
			for li in listCounter:
				result.merge(li)
		    	global sor
		   	sor=sorted(result, key=result.get, reverse=True)
		   	#if we already have 1 for 1 keyword and title is 2 keyword
		   	#ex sachin tendulkar we got Sachin:1 but not right results
		   	#right on is Sachin:0s.5 Tendulkar:0.5
		   	#below for loop for doing exact match
		   	#print sor
		   	for ind,term in enumerate(sor):
		   		#remove punctuataion ex ajay k. sood vs ajay k sood
		   		t=term.lower()#remove_punctuations(term)# this does not remove hash tags
		   		redirect=False
		   		if "#r#" in t and t.replace("#r#","")=="_".join(lis):
		   			#This condition is for checking if user query matches to a redirect page
	       		   		resolved=resolveRedirects(term.replace("#r#",""))
			   		sor[ind],sor[0]=sor[0],resolved
					wikiAns=[sor[0]]  
					wikiAnsGot=True  
					return				
		   		elif t.lower()=="_".join(lis):
					#Not a redirect but a exact match
			   		sor[ind],sor[0]=sor[0],sor[ind]
			   		b=checkForDis(sor)
					wikiAns=[b]
					wikiAnsGot=True
					return
			#did'nt got exact match just return 1
			#Exact match with user query not found but found something which adds upto 1
		   	if result[sor[0]]==1.0:
		   		if "#r#" in sor[0]:
		   			resolved=resolveRedirects(sor[0].replace("#r#",""))
			   		sor[0]=resolved
		   			wikiAns=[sor[0]]
		   			wikiAnsGot=True
					return
		   		else:
		   			b=checkForDis(sor)
		   			wikiAns=[b]
		   			wikiAnsGot=True
		   			return
			else:
		   		rnks=compareRanks(sor)
		   		if not len(rnks)==0:
		   			if "#r#" in rnks[0]:
		   				wikiAns=[resolveRedirects(rnks[0].replace("#r#",""))]
		   				wikiAnsGot=True
		   				return
					else:
		   				wikiAns=[rnks[0]]
		   				wikiAnsGot=True
						return
		   		else:
		   			if "#r#" in sor[0]:
		   				wikiAns=[resolveRedirects(sor[0].replace("#r#",""))]
		   				wikiAnsGot=True
						return
		   			else:
		   				b=checkForDis(sor)
		   				wikiAns=[b]#worst result not exact match not ranked
		   				wikiAnsGot=True
						return
		   else:
		   	wikiAns=[]
		   	wikiAnsGot=True
			return
		except Exception as e:
		   typee, value, traceback = sys.exc_info()
    		   #print typee
		   # Rollback in case there is any error
		   db.rollback()
		   wikiAns=[]
		   wikiAnsGot=True
		   return

		# disconnect from server
		db.close()
	def waitForAns():
		global wikiAnsGot,lis
		while not wikiAnsGot:
			pass
		if len(wikiAns)==0:
			pass
		else:
			addToCache(lis,wikiAns[0])
	q=" ".join(lis1)#passing org query
	global th1,th2
	th1=Process(target=checkInCache,args=(q,))
	th1.start()
	th2=Process(target=wikiMain,args=())
	th2.start()
	tt=Process(target=waitForAns,args=())
	tt.start()
	tt.join()
	return wikiAns
    #below portion is shared by both news and general
    db = client[dbase]
    results=db[coll].find({"keyword":{"$in":lis}},{"keyword":0,"_id":0})
    listCounters = list(results)
    if dbase  == "news" :
        #print len(listCounters),len(lis),lis
        if len(lis) == len(listCounters):
            #check =1
            #print list(getNewContent(listCounters))
            return list(getNewContent(listCounters))
    else:
        check =1
    
    result=SumDict({})
    if check ==1:
    
        for listCounter in listCounters:
            result.merge(listCounter)
    #print result
        sor=sorted(result, key=result.get, reverse=True)
        #print sor
        return sor
    else:
        return []
import urllib2 as u
import time
import parser
import threading

from pymongo import MongoClient as mc

db = mc()
db = db["news"]
col = db["bhaskar"]


START_DATE = time.strptime(open("./crawlers/bhaskar/start_date.txt","r").read(), "%Y-%m-%d")
START_DATE = time.mktime(START_DATE)
BASE_URL = "http://www.bhaskar.com/archives/"

def main():
	dates = []
	#generate dates to be crawled
	days = ( int(time.time()) - int(START_DATE) ) / (3600*24)
	PREV_DATE = START_DATE
	for i in range(days):
		day = int(PREV_DATE) + (3600*24)
		PREV_DATE = day
		day =  time.strftime("%Y-%m-%d", time.localtime(int(day))) 
		dates.append(day)

	#fetch for each date and insert
	for day in dates:
		print BASE_URL+day+"/"
		fetch_url = BASE_URL+day+"/"
Beispiel #41
0
from pymongo import MongoClient as mc
import numpy as np
from bson.binary import Binary
import pickle

myclient = mc("mongodb://localhost:27017/")

db = myclient["faceszakh"]

col = db["faces"]

db.faces.insert_one({"name": "Deleteme"})

col.delete_one({'name': {'$eq': 'Deleteme'}})

print('db created')
print(myclient.list_database_names())
Beispiel #42
0
def Connection():
    _client = mc('localhost', 27017)
    _dataBase = _client['Loterias']
    _collections = _dataBase['Contests']
    return _collections
Beispiel #43
0
 def __init__(self):
     self.host = MONGO_HOST
     self.port = MONGO_PORT
     self.client = mc(self.host, self.port)
     self.db = self.client[MONGO_DB]
     self.collection = self.db[MONGO_COLLECTION]
Beispiel #44
0
def getMongo():
	global config
	from pymongo import MongoClient as mc
	db=mc(config["mongo_uri"])["iris"]
	return db