def teardown_databases(self, db_name, **kwargs): from pymongo import Connection conn = Connection() conn.drop_database(db_name) print 'Dropping test-database: ' + db_name
# Settings for logfile, and yararules file are also located in the midas-settings.cfg file. import ConfigParser, exiftool, os, shutil, sys, hashlib, datetime, time, argparse, logging, pymongo, json, urllib, urllib2 from multiprocessing import Pool # Import DB Config from midas-settings.cfg config = ConfigParser.SafeConfigParser() config.read("midas-settings.cfg") dbserver = config.get('midasdb', 'server') dbport = int(config.get('midasdb', 'port')) dbdb = config.get('midasdb', 'db') dbcoll = config.get('midasdb', 'collection') # Database Connection Information from pymongo import Connection metadatacollection = Connection(dbserver, dbport)[dbdb][dbcoll] # Argument Parser and Usage Help parser = argparse.ArgumentParser( description='Metadata Inspection Database Alerting System') parser.add_argument('Path', help='Path to directory of files to be scanned (Required)') parser.add_argument( '-d', '--delete', action='store_true', help= 'Deletes files after scanning and extracting metadata (Default: False)', required=False) parser.add_argument( '-m',
print("PASSED TEST STRESSED ROLBACK") def abort_test(self): print("TEST FAILED") sys.exit(1) if __name__ == '__main__': os.system('rm config.txt; touch config.txt') parser = OptionParser() #-m is for the main address, which is a host:port pair, ideally of the #mongos. For non sharded clusters, it can be the primary. parser.add_option("-m", "--main", action="store", type="string", dest="main_addr", default="27217") (options, args) = parser.parse_args() PORTS_ONE['MONGOS'] = options.main_addr s = DocManager('localhost:30000') s._remove() start_cluster() conn = Connection('localhost:' + PORTS_ONE['MONGOS'], replicaSet="demo-repl") print("STARTING TESTS") unittest.main(argv=[sys.argv[0]]) print("Done with tests") clean_up()
if record.has_key('favitemcount') and record.has_key('favshopcount') \ and record.has_key('indirectpay') and record.has_key('directpay') \ and record.has_key('indirectpaycount') and record.has_key('directpaycount'): summary_rpt_dict[campaign_id] = { 'fav': record['favitemcount'] + record['favshopcount'], 'pay': record['indirectpay'] + record['directpay'], 'paycount': record['indirectpaycount'] + record['directpaycount'] } #for key in summary_rpt_dict.keys(): # summary_rpt_dict[key]['impressions'] /=3 # summary_rpt_dict[key]['click'] /=3 # summary_rpt_dict[key]['cost'] /=3 #print "ssss:", summary_rpt_dict return summary_rpt_dict if __name__ == '__main__': MONGODB_HOST = 'app.maimiaotech.com' MONGODB_PORT = 2006 from pymongo import Connection mongoConn = Connection(host=MONGODB_HOST, port=MONGODB_PORT) rpt_effect_db = RptEffectDB(mongoConn, '57620080') rpt_effect_db.save_records([{'a': 1, 'b': 3}, {'a': 5, 'b': 9}])
def save_to_mongo(data, collection): return collection.insert(data) # # Program entry point # if __name__ == "__main__": # Parse CLI arguments cli_parser = build_cli_arguments() args = cli_parser.parse_args() filename = args.file # Create DB connection mongo = Connection(args.host, args.port) db = mongo[args.database] coll = db[args.collection] # Use one file loader or another depending of its extension if filename.endswith(".gz"): with gzip.open(filename) as f: process_loop(f, args.batch_size) else: with open(filename) as f: process_loop(f, args.batch_size) mongo.disconnect()
import sys import optparse from collections import Counter, namedtuple from operator import itemgetter, mul, attrgetter import multiprocessing import colorsys from PIL import Image as Im from PIL import ImageChops, ImageDraw from colormath.color_objects import RGBColor import requests from StringIO import StringIO from pymongo import Connection connection = Connection('localhost', 27017) db = connection.nSquared COLLECTION = 'thumbs' Color = namedtuple('Color', ['value', 'prominence']) Palette = namedtuple('Palette', 'colors bgcolor') WHITE = (255, 255, 255) BLACK = (0, 0, 0) # algorithm tuning N_QUANTIZED = 100 # start with an adaptive palette of this size MIN_DISTANCE = 10.0 # min distance to consider two colors different MIN_PROMINENCE = 0.01 # ignore if less than this proportion of image MIN_SATURATION = 0.05 # ignore if not saturated enough MAX_COLORS = 5 # keep only this many colors
def connect(self): self.__con = Connection(host=self.host) self.__db = self.__con[self.db] log.info('[%s] mongodb connected.' % self.host)
def get_all_ids_from_db(collection_name): collection = Connection().jd[collection_name] ids = map(lambda d: d['_id'], collection.find({}, {'_id': 1})) return ids
def run(self): """Discovers the mongo cluster and creates a thread for each primary. """ main_conn = Connection(self.address) if self.auth_key is not None: main_conn['admin'].authenticate(self.auth_username, self.auth_key) self.read_oplog_progress() conn_type = None try: main_conn.admin.command("isdbgrid") except pymongo.errors.OperationFailure: conn_type = "REPLSET" if conn_type == "REPLSET": #non sharded configuration oplog_coll = main_conn['local']['oplog.rs'] prim_admin = main_conn.admin repl_set = prim_admin.command("replSetGetStatus")['set'] oplog = oplog_manager.OplogThread( main_conn, (main_conn.host + ":" + str(main_conn.port)), oplog_coll, False, self.doc_manager, self.oplog_progress, self.ns_set, self.auth_key, self.auth_username, repl_set=repl_set) self.shard_set[0] = oplog logging.info('MongoConnector: Starting connection thread %s' % main_conn) oplog.start() while self.can_run: if not self.shard_set[0].running: logging.error("MongoConnector: OplogThread" " %s unexpectedly stopped! Shutting down" % (str(self.shard_set[0]))) self.oplog_thread_join() self.doc_manager.stop() return self.write_oplog_progress() time.sleep(1) else: # sharded cluster while self.can_run is True: for shard_doc in main_conn['config']['shards'].find(): shard_id = shard_doc['_id'] if shard_id in self.shard_set: if not self.shard_set[shard_id].running: logging.error( "MongoConnector: OplogThread" " %s unexpectedly stopped! Shutting down" % (str(self.shard_set[shard_id]))) self.oplog_thread_join() self.doc_manager.stop() return self.write_oplog_progress() time.sleep(1) continue try: repl_set, hosts = shard_doc['host'].split('/') except ValueError: cause = "The system only uses replica sets!" logging.error("MongoConnector: %s", cause) self.oplog_thread_join() self.doc_manager.stop() return shard_conn = Connection(hosts, replicaset=repl_set) oplog_coll = shard_conn['local']['oplog.rs'] oplog = oplog_manager.OplogThread( shard_conn, self.address, oplog_coll, True, self.doc_manager, self.oplog_progress, self.ns_set, self.auth_key, self.auth_username) self.shard_set[shard_id] = oplog msg = "Starting connection thread" logging.info("MongoConnector: %s %s" % (msg, shard_conn)) oplog.start() self.oplog_thread_join()
from pymongo import Connection #host = '172.16.1.50' host = '127.0.0.1' db = Connection(host).wendui def main(): for n_msg in db.n_msg.find(): nmsg_id = n_msg['_id'] print nmsg_id n_msg['fsend_id'] = n_msg.get('op_sender', None) n_msg['deleted'] = False n_msg['status'] = 'UNREAD' parts = n_msg['partitions'] for i in range(len(parts)): part = parts[i] n_msg['domain_id'] = part['_id'] n_msg['target_id'] = parts[i ^ 1]['_id'] n_msg['session_id'] = n_msg['domain_id'] + ':' + n_msg['target_id'] if part['role'] == 'SENDER': n_msg['type'] = 'SEND' n_msg['_id'] = nmsg_id + '-reverse' else: n_msg['type'] = 'RECEIVE' n_msg['_id'] = nmsg_id db.message.insert(n_msg) if not db.message_session.find_one({'_id': n_msg['session_id']}):
for index, user in enumerate(collection.find()): features += user[key] bar.draw(index + 1) features = sorted(Counter(features).items(), key=lambda d: d[1], reverse=True) fout = open('./features/review.feature', 'w') for f in features: fout.write('%s %d\n' % (f[0].encode('utf8'), f[1])) if __name__ == '__main__': #construct_data_set('user') #construct_data_set('product') #output_graph() #construct_train_user() #construct_train_product() #construct_test_user() #attribute_statistics('gender') #attribute_statistics('age') #attribute_statistics('location') #attribute_statistics('kids') #output_all_features() #output_user_product_graph() #insert_LINE_vector() insert_review(Connection().jd.train_users, RAW_DATA_DIR + 'user_review.data') #insert_review(Connection().jd.test_users,RAW_DATA_DIR+'test_user_review.data') #output_features(base_dir+'/features/review.feature','review') print 'Done'
from pymongo import Connection from pymongo.errors import ConnectionFailure import tweetstream import string def normalize(s): ret = s for p in string.punctuation: ret = ret.replace(p, '') return ret """ Connect to MongoDB """ try: c = Connection(host="localhost", port=27017) except ConnectionFailure, e: sys.stderr.write("Could not connect to MongoDB: %s" % e) sys.exit(1) db = c['nba_tweets'] with open('nba_queries.txt') as f: queries = [i.strip() for i in f.readlines()] with tweetstream.SampleStream('username', 'pass') as stream: for tweet in stream: if 'text' in tweet.keys() and len(tweet['text']) > 0: if True in [i.lower() in queries for i in tweet['text'].split()]: print tweet['text'] db.tweets.insert(tweet)
import json import subprocess import sys if len(sys.argv) <= 1: print 'usage: populate.py [es/mongo]' sys.exit(1) es_or_mongo = sys.argv[1] mongo = es_or_mongo == 'mongo' inserts = [] if mongo: MONGO_URI = 'localhost' c = Connection(MONGO_URI) mongo_db = c['greenbook'] collection = mongo_db['edict'] else: ELASTICSEARCH_URI = 'localhost:9200' es = rawes.Elastic(ELASTICSEARCH_URI) try: print "Dropping index if exist..." # drop existing index es.delete('edict') except: print "No pre-existing index found" print "Creating a new index..." mapping = {
# Crappy PAG (Process And GUI) visualizer from pymongo import Connection, ASCENDING, DESCENDING import sys sys.path.insert(0, '../../GUItracing/') sys.path.insert(0, '../../SystemTap/') from parse_gui_trace import DesktopState session_name = sys.argv[1] c = Connection() db = c.burrito_db proc_col = db.process_trace gui_col = db.gui_trace for dat in proc_col.find({'session_tag': session_name}, {'phases.name':1}, sort=[('_id', ASCENDING)]): for p in dat['phases']: if p['name'] and 'monitor' in p['name']: print p
def save(record): db['global'].save(record) if __name__ == "__main__": if (len(sys.argv) != 5): print "Usage: %s <username> <password> <url> <csv-files>" % sys.argv[0] username = sys.argv[1] password = sys.argv[2] url = sys.argv[3] file_list = sys.argv[4:] print username print password print len(file_list) if (len(url) == 0): connection = Connection() # Connect to localhost else: connection = Connection(url) # Connect to remote db db = Database(connection, 'zip') # Get zip database db.authenticate(username, password) # Authenticate for csv_file in file_list: # Add all the files add(csv_file)
def edit_card(request, html_key): template_card_id = request.GET.get("id", 0) mongoconn = Connection('182.92.163.118', 27017) cars_db = mongoconn['yaoqingka']["cards"] # print html_key item = cars_db.find_one({'_id': html_key}) # print item card = InviteCard() card.id = 0 card.recipient = item['recipient'] card.content = item['content'].replace('\n', '</br>') card.content = card.content.replace(' ', ' ') # print card.content card.addressor = item['addressor'] # print item['card_image'] image = item['card_image'].replace('http://qstatic.zuimeia.com/img/', '') image_top = item['image_top'].replace('http://qstatic.zuimeia.com/img/', '') image2 = item['card_image2'].replace('http://qstatic.zuimeia.com/img/', '') share_image = item['share_image'].replace( 'http://qstatic.zuimeia.com/img/', '') # print image card.card_image = image card.image_top = image_top card.card_image2 = image2 card.share_image = share_image card.word_postion = item['word_postion'] card.word_color = item['word_color'] card.bg_color = item['bg_color'] card.title_color = item['title_color'] card.meeting_time = item['meeting_time'] card.meeting_location = item['meeting_location'] card.title = item['title'] card.id = item['template_card_id'] card.content = card.content.replace('<p>', '') card.content = card.content.replace('</p>', '') card.content = card.content.replace('</br>', '\n') card.content = card.content.replace(' ', ' ') if request.POST: print request.POST recipient = request.POST.get('recipient') content = request.POST.get('content') addressor = request.POST.get('addressor') card_image = request.POST.get('card_image') image_top = request.POST.get('image_top') card_image2 = request.POST.get('card_image2') share_image = request.POST.get('share_image') word_postion = request.POST.get('word_postion') meeting_time = request.POST.get('meeting_time') meeting_location = request.POST.get('meeting_location') word_color = request.POST.get('word_color') bg_color = request.POST.get('bg_color') title_color = request.POST.get('title_color') template_card_id = request.POST.get('template_card_id') title = request.POST.get('title') # 保存内容生成唯一页面, 存入mongodb chars = (string.ascii_letters + string.digits).lower() html_key = ''.join(random.sample(chars, 20)) # print recipient, content, addressor, card_image item = {} item['_id'] = html_key item['recipient'] = recipient item['content'] = content item['addressor'] = addressor item['card_image'] = card_image item['image_top'] = image_top item['card_image2'] = card_image2 item['share_image'] = share_image item['word_postion'] = word_postion item['meeting_time'] = meeting_time item['meeting_location'] = meeting_location item['word_color'] = word_color item['bg_color'] = bg_color item['title_color'] = title_color item['template_card_id'] = template_card_id item['title'] = title item['created_at'] = datetime.now() cars_db.insert(item) # html_key = 'xxxx' return HttpResponseRedirect('/yqk/card/show/%s/?id=%d' % (html_key, int(template_card_id))) edit_template = False return render_to_response('yaoqingka/write_card.html', locals(), context_instance=RequestContext(request))
def test_rollback(self): """Test rollback in oplog_manager. Assertion failure if it doesn't pass We force a rollback by inserting a doc, killing the primary, inserting another doc, killing the new primary, and then restarting both. """ os.system('rm config.txt; touch config.txt') start_cluster() test_oplog, primary_conn, mongos, oplog_coll = self.get_new_oplog() solr = DocManager() test_oplog.doc_manager = solr solr._delete() # equivalent to solr.delete(q='*: *') obj1 = ObjectId('4ff74db3f646462b38000001') mongos['test']['test'].remove({}) mongos['test']['test'].insert({ '_id': obj1, 'name': 'paulie' }, safe=True) while (mongos['test']['test'].find().count() != 1): time.sleep(1) cutoff_ts = test_oplog.get_last_oplog_timestamp() obj2 = ObjectId('4ff74db3f646462b38000002') first_doc = { 'name': 'paulie', '_ts': bson_ts_to_long(cutoff_ts), 'ns': 'test.test', '_id': obj1 } #try kill one, try restarting killMongoProc(primary_conn.host, PORTS_ONE['PRIMARY']) new_primary_conn = Connection('localhost', int(PORTS_ONE['SECONDARY'])) admin = new_primary_conn['admin'] while admin.command("isMaster")['ismaster'] is False: time.sleep(1) time.sleep(5) count = 0 while True: try: current_conn = mongos['test']['test'] current_conn.insert({'_id': obj2, 'name': 'paul'}, safe=True) break except: count += 1 if count > 60: string = 'Call to insert doc failed too many times' logging.error(string) sys.exit(1) time.sleep(1) continue while (mongos['test']['test'].find().count() != 2): print(mongos['test']['test'].find().count()) time.sleep(1) killMongoProc(primary_conn.host, PORTS_ONE['SECONDARY']) startMongoProc(PORTS_ONE['PRIMARY'], "demo-repl", "/replset1a", "/replset1a.log", None) #wait for master to be established while primary_conn['admin'].command("isMaster")['ismaster'] is False: time.sleep(1) startMongoProc(PORTS_ONE['SECONDARY'], "demo-repl", "/replset1b", "/replset1b.log", None) #wait for secondary to be established admin = new_primary_conn['admin'] while admin.command("replSetGetStatus")['myState'] != 2: time.sleep(1) while retry_until_ok(mongos['test']['test'].find().count) != 1: time.sleep(1) self.assertEqual(str(new_primary_conn.port), PORTS_ONE['SECONDARY']) self.assertEqual(str(primary_conn.port), PORTS_ONE['PRIMARY']) last_ts = test_oplog.get_last_oplog_timestamp() second_doc = { 'name': 'paul', '_ts': bson_ts_to_long(last_ts), 'ns': 'test.test', '_id': obj2 } test_oplog.doc_manager.upsert(first_doc) test_oplog.doc_manager.upsert(second_doc) test_oplog.rollback() test_oplog.doc_manager.commit() results = solr._search() assert (len(results) == 1) results_doc = results[0] self.assertEqual(results_doc['name'], 'paulie') self.assertTrue(results_doc['_ts'] <= bson_ts_to_long(cutoff_ts)) #test_oplog.join() print("PASSED TEST ROLLBACK")
import os import unittest import datetime import json from models.BaseModel import BaseModel from models.EntityManager import EntityManager from pymongo import Connection import settings from models import Logger conn = Connection(settings.DBHOST, settings.DBPORT) _DBCON = conn.test class ChildModel(BaseModel): def __init__(self, DBCON, _id=None): self.fields = [ ('title', None), ('content', None), ('sort_field', None), ('tags', []), ('added', datetime.datetime.now()), ] self.init(DBCON, _id) class EntityManagerTest(unittest.TestCase): def setUp(self):
def connect_mongodb(): connection = Connection('localhost', 27017); db = connection.wordnet return db
# python 1.py tools/libxl/libxl.c libxl 372 import sys from pymongo import Connection connect = Connection('127.0.0.1', 27017) db = connect.satpg collect = db.satpg argvs = sys.argv argc = len(argvs) f = open(argvs[1]) line = f.readline() print "digraph sample {" print "START[color=red penwidth=5];" print "EMPTY[shape=box color=red penwidth=5];" while line: line2 = line.strip("*").replace("*","") tmp = line2.split(",") try: ret = collect.find({"src" : tmp[2]}) for doc in ret:
import jieba.posseg as pseg import codecs from gensim import corpora, models, similarities import mysql.connector from pymongo import Connection con = Connection('localhost', 21001) db = con.noonde_api_development col = db.no_sql_airbnb_listings
# timeRes = timeReg.search(fLine) # fileTime = 0 # if int(timeRes.group(2)) >= 30: # Only checking every half hour # fileTime = 30 # smallName = setFilename(oitName, timeRes) # smallFile = open(smallName, 'w') # smallFile.write(fLine) ######################################### # Starting MongoDB ######################################### # client = MongoClient('localhost', 27017) client = Connection() odb = client['oit-db'] cl_list = odb.collection_names() if len(cl_list) != 0: # if "oit_trace" in cl_list: # odb.drop_collection('oit_trace') if "oit_transition" in cl_list: odb.drop_collection('oit_transition') cl_general = odb['oit_trace'] cl_transition = odb['oit_transition'] kReg = re.compile(r"KERNEL")
# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html from scrapy.exceptions import DropItem # from .items import JpAvItemId from pymongo import Connection MongoCon = Connection('localhost', 27017) # the default is 27017 MiscDB = MongoCon.misc import urllib SPEC_DB = ['tb_user', 'tb_thread', 'tb_fidfname', 'wb_userstatus', 'jp_av'] class MongoPipeline(object): def __init__(self): self.seenDict = dict() self.seenDict['jp_av'] = [ i['slug'] for i in MongoCon.jp_av.items.find() ] def process_item(self, item, spider): if hasattr(spider, 'alias'): alias = urllib.unquote(spider.alias) if spider.name == 'jp_av': if item['slug'] in self.seenDict['jp_av']: raise DropItem('Duplicate item found: %s' % item) # alias = urllib.unquote(spider.slug) MongoCon.jp_av.items.insert(dict(item))
from pymongo import Connection import sys connection = Connection("localhost", 27017) class Id: def __init__(self, name): self.name = name def __str__(self): return "<{}>".format(self.name) class Tripple: def __init__(self, in_, relation, out): self.in_ = in_ if not isinstance(relation, Id): relation = Id(relation) self.relation = relation self.out = out def _format(self, node): if isinstance(node, Id): return str(node) x = str(node) x = x.replace("\\", "\\\\") if "\\" in x else x x = x.replace('"', '\\"') if '"' in x else x
def test2(): from pymongo import Connection connection = Connection('localhost') db = connection.sm
def __init__(self, directory='./', host='localhost', db='monfs', collection='objects'): self.conn = Connection(host) self.db = db self.collection = collection self.readDir(directory)
for line in cur.fetchall(): actor, repo_name, event_type = line owner = repo_name.rsplit('/', 1) # if not repo_dict[repo_name].get("yesterday"): # repo_dict[repo_name]['yesterday'] = {"Owner_push": 0, "Owner_issue": 0, "Owner_pullrequest": 0, "Other_star": 0, "Other_pullrequest": 0, "Other_issue": 0} key_prefix = "Owner_" if owner == actor else "Other_" cur_item = repo_dict[repo_name]['yesterday'] if event_type == 'PushEvent': cur_item["Owner_push"] += 1 if event_type in ['IssuesEvent', 'IssueCommentEvent']: cur_item[key_prefix + 'issue'] += 1 if event_type in ['PullRequestEvent', 'PullRequestReviewCommentEvent']: cur_item[key_prefix + 'pullrequest'] += 1 if event_type == 'WatchEvent': cur_item["Other_star"] += 1 # Save to mongodb client = MongoClient('localhost', 27017) curation_collection = Connection()['curation']['curation_profile'] for k, v in repo_dict.iteritems(): v['url'] = k curation_collection.insert(v) del curation_collection
from pymongo import Connection from queue import Empty, Queue from time import time from xmlrpc.server import SimpleXMLRPCServer import os import sys sys.path.append(os.getcwd()) from dmut.common.util import log db = Connection()["mutants"] mutants = db.fs.files q = Queue() log("Populating queue with mutants.") for mutant in mutants.find({"killed": False}): q.put(str(mutant["_id"])) log("Queue populated. Ready to serve.") start = time() def get(): try: return q.get() except Empty:
import csv import pymongo import MySQLdb, MySQLdb.cursors from pymongo import Connection import urllib LOINC_TOP2K_VALUE_SET = "LOINC Top 2000" VAKP_VALUE_SET = "SNOMED VA/KP Problem List" CORE_PROBLEM_LIST_VALUE_SET = "SNOMED CORE Problem List" GENERIC_CLINICAL_DRUG = "RxNorm Generic Clinical Drug" BRANDED_CLINICAL_DRUG = "RxNorm Branded Clinical Drug" BRAND_NAME = "RxNorm Brand Name" GENERIC_NAME = "RxNorm Generic Name" connection = Connection() db = connection.vocab concepts = db.concepts # clear all value set flags for code in concepts.find({'valueSets': {'$not': {'$size': 0}}}): code['valueSets'] = [] concepts.update({'_id': code['_id']}, code) # print code print "Fetching codes from LOINC.org" traw = urllib.urlopen( 'http://loinc.org/usage/obs/loinc-top-2000-plus-loinc-lab-observations-us.csv' ) loincReader = csv.DictReader(traw)
# Give numeric levels to courts # 5 = US Supreme Court # 4 = US Courts of Appeals # 3 = US District Courts # 2 = State Courts # 0 = Unknown or Misc lower from pymongo import Connection # Make a connection to Mongo. try: db_conn = Connection() # db_conn = Connection("emo2.trinity.duke.edu", 27017) except ConnectionFailure: print "couldn't connect: be sure that Mongo is running on localhost:27017" sys.exit(1) db = db_conn['fashion_ip'] total_docs = db.docs.find().count() for count, doc in enumerate(db.docs.find({}, {'_id': True, 'court': True})): if count % 100 == 0: print count, '/', total_docs level = 0 court = doc['court'] # Order is important here... if court.startswith('Supreme Court of United States'): level = 5