Ejemplo n.º 1
0
class EventsTestMixin(object):
    """
    Helpers and setup for running tests that evaluate events emitted
    """
    def setUp(self):
        super(EventsTestMixin, self).setUp()
        self.event_collection = MongoClient()["test"]["events"]
        self.event_collection.drop()
        self.start_time = datetime.now()

    def assert_event_emitted_num_times(self, event_name, event_time, event_user_id, num_times_emitted):
        """
        Tests the number of times a particular event was emitted.
        :param event_name: Expected event name (e.g., "edx.course.enrollment.activated")
        :param event_time: Latest expected time, after which the event would fire (e.g., the beginning of the test case)
        :param event_user_id: user_id expected in the event
        :param num_times_emitted: number of times the event is expected to appear since the event_time
        """
        self.assertEqual(
            self.event_collection.find(
                {
                    "name": event_name,
                    "time": {"$gt": event_time},
                    "event.user_id": int(event_user_id),
                }
            ).count(), num_times_emitted
        )
Ejemplo n.º 2
0
class TestMongoRouter(unittest.TestCase):
    def setUp(self):
        self.test_collection = MongoClient()["test_db"]["test_db"]

    def tearDown(self):
        self.test_collection.drop()

    def test_route(self):
        self.test_collection.insert_one({"test_id": "tid", "test": "success"})

        router = MongoRouter()

        self.assertEquals(
            "success",
            router.route("test").find_one({
                "test_id": "tid"
            }).get("test", None))

        router.route("test").insert_one({
            "test_id": "tid_2",
            "test": "success"
        })

        self.assertEquals(
            "success",
            router.route("test").find_one({
                "test_id": "tid_2"
            }).get("test", None))
Ejemplo n.º 3
0
class WAMPTicker(ApplicationSession):
    """ WAMP application - subscribes to the 'ticker' push api and saves pushed
    data into a mongodb """
    @inlineCallbacks
    def onJoin(self, details):
        # open/create poloniex database, ticker collection/table
        self.db = MongoClient().poloniex['ticker']
        self.db.drop()
        initTick = Poloniex().returnTicker()
        for market in initTick:
            initTick[market]['_id'] = market
            self.db.insert_one(initTick[market])
        yield self.subscribe(self.onTick, 'ticker')
        print('Subscribed to Ticker')

    def onTick(self, *data):
        self.db.update_one({"_id": data[0]}, {
            "$set": {
                'last': data[1],
                'lowestAsk': data[2],
                'highestBid': data[3],
                'percentChange': data[4],
                'baseVolume': data[5],
                'quoteVolume': data[6],
                'isFrozen': data[7],
                'high24hr': data[8],
                'low24hr': data[9]
            }
        })

    def onDisconnect(self):
        # stop reactor if disconnected
        if reactor.running:
            reactor.stop()
Ejemplo n.º 4
0
class MongoStore(Store):
    def __init__(self, db, collection, url='mongodb://localhost'):
        self.collection = MongoClient(url)[db][collection]

    def fetch(self, oid):
        return self.collection.find_one({'_id':oid})

    def fetch_all(self):
        return self.collection.find()

    def iter_ids(self):
        for obj in self.collection.find({}, {'_id':True}):
            yield obj['_id']

    def save(self, obj):
        self.collection.save(obj)        
   
    def save_many(self, obj_iter):
        self.collection.insert(obj_iter)    

    def flush(self):
        self.collection.drop()

    def delete(self, oid):
        self.collection.delete_one({'_id':oid})
Ejemplo n.º 5
0
def parent():
	SLEEP = 10
	p = MongoClient().client["MP"].p
	p.drop(); p.insert_one({"_id": 1}); p.insert_one({"_id": 2})
	isParent = True

	newpid1 = os.fork()
	# We are the child
	if newpid1 == 0:
		isParent = False
		child1()
		p = MongoClient().client["MP"].p; p.remove({"_id": 1})
	# We are the parent
	else:
		newpid2 = os.fork()
		# We are the child
		if newpid2 == 0:
			isParent = False
			child2()
			p = MongoClient().client["MP"].p; p.remove({"_id": 2})

	if not isParent:
		print "PROCESS FINISHED"
	else:
		wait = True
		while wait:
			ps = p.find({})
			wait = False if ps.count() == 0 else True
			if wait:
				print "MAIN PROCESS WAITING: %i" % ps.count()
				time.sleep(SLEEP)

		print "MAIN PROCESS FINISHED"
Ejemplo n.º 6
0
class EventsTestMixin(object):
    """
    Helpers and setup for running tests that evaluate events emitted
    """
    def setUp(self):
        super(EventsTestMixin, self).setUp()
        self.event_collection = MongoClient()["test"]["events"]
        self.event_collection.drop()
        self.start_time = datetime.now()

    def assert_event_emitted_num_times(self, event_name, event_time, event_user_id, num_times_emitted):
        """
        Tests the number of times a particular event was emitted.
        :param event_name: Expected event name (e.g., "edx.course.enrollment.activated")
        :param event_time: Latest expected time, after which the event would fire (e.g., the beginning of the test case)
        :param event_user_id: user_id expected in the event
        :param num_times_emitted: number of times the event is expected to appear since the event_time
        """
        self.assertEqual(
            self.event_collection.find(
                {
                    "name": event_name,
                    "time": {"$gt": event_time},
                    "event.user_id": int(event_user_id),
                }
            ).count(), num_times_emitted
        )
Ejemplo n.º 7
0
class TestBucketIntegration(unittest.TestCase):
    def setUp(self):
        self.db = database.Database(HOST, PORT, DB_NAME)
        self.bucket = bucket.Bucket(self.db, BUCKET)
        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]

    def setup__timestamp_data(self):
        self.mongo_collection.save({
            "_id": 'last',
            "_timestamp": d_tz(2013, 3, 1),
            "_week_start_at": d_tz(2013, 2, 25)
        })
        self.mongo_collection.save({
            "_id": 'first',
            "_timestamp": d_tz(2013, 1, 1),
            "_week_start_at": d_tz(2012, 12, 31)
        })
        self.mongo_collection.save({
            "_id": 'second',
            "_timestamp": d_tz(2013, 2, 1),
            "_week_start_at": d_tz(2013, 1, 28)
        })

    def tearDown(self):
        self.mongo_collection.drop()

    def test_that_records_get_sent_to_mongo_correctly(self):
        my_record = Record({'foo': 'bar'})
        self.bucket.store(my_record)

        collection = self.mongo_collection.find()
        assert_that(list(collection), only_contains(
            has_entries({"foo": "bar"})
        ))

    def test_that_a_list_of_records_get_sent_to_mongo_correctly(self):
        my_records = [
            Record({'name': 'Groucho'}),
            Record({'name': 'Harpo'}),
            Record({'name': 'Chico'})
        ]

        self.bucket.store(my_records)

        collection = self.mongo_collection.find()
        assert_that(list(collection), only_contains(
            has_entries({'name': 'Groucho'}),
            has_entries({'name': 'Harpo'}),
            has_entries({'name': 'Chico'})
        ))

    def test_period_queries_get_sorted_by__week_start_at(self):
        self.setup__timestamp_data()
        query = Query.create(period="week")
        result = query.execute(self.bucket.repository)
        assert_that(result.data(), contains(
            has_entry('_start_at', d_tz(2012, 12, 31)),
            has_entry('_start_at', d_tz(2013, 1, 28)),
            has_entry('_start_at', d_tz(2013, 2, 25))
        ))
Ejemplo n.º 8
0
class TestBucketIntegration(unittest.TestCase):
    def setUp(self):
        self.db = database.Database(HOST, PORT, DB_NAME)
        self.bucket = bucket.Bucket(self.db, BUCKET)
        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]

    def setup__timestamp_data(self):
        self.mongo_collection.save({
            "_id": 'last',
            "_timestamp": d_tz(2013, 3, 1),
            "_week_start_at": d_tz(2013, 2, 25)
        })
        self.mongo_collection.save({
            "_id": 'first',
            "_timestamp": d_tz(2013, 1, 1),
            "_week_start_at": d_tz(2012, 12, 31)
        })
        self.mongo_collection.save({
            "_id": 'second',
            "_timestamp": d_tz(2013, 2, 1),
            "_week_start_at": d_tz(2013, 1, 28)
        })

    def tearDown(self):
        self.mongo_collection.drop()

    def test_that_records_get_sent_to_mongo_correctly(self):
        my_record = Record({'foo': 'bar'})
        self.bucket.store(my_record)

        collection = self.mongo_collection.find()
        assert_that(list(collection), only_contains(has_entries({"foo":
                                                                 "bar"})))

    def test_that_a_list_of_records_get_sent_to_mongo_correctly(self):
        my_records = [
            Record({'name': 'Groucho'}),
            Record({'name': 'Harpo'}),
            Record({'name': 'Chico'})
        ]

        self.bucket.store(my_records)

        collection = self.mongo_collection.find()
        assert_that(
            list(collection),
            only_contains(has_entries({'name': 'Groucho'}),
                          has_entries({'name': 'Harpo'}),
                          has_entries({'name': 'Chico'})))

    def test_period_queries_get_sorted_by__week_start_at(self):
        self.setup__timestamp_data()
        query = Query.create(period="week")
        result = query.execute(self.bucket.repository)
        assert_that(
            result.data(),
            contains(has_entry('_start_at', d_tz(2012, 12, 31)),
                     has_entry('_start_at', d_tz(2013, 1, 28)),
                     has_entry('_start_at', d_tz(2013, 2, 25))))
class RepositoryIntegrationTest(unittest.TestCase):
    __metaclass__ = ABCMeta

    def setUp(self):
        mongo = MongoDriver(MongoClient(HOST, PORT)[DB_NAME][BUCKET])
        self.repo = Repository(mongo)

        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]
        self.mongo_collection.drop()
Ejemplo n.º 10
0
class RepositoryIntegrationTest(unittest.TestCase):
    __metaclass__ = ABCMeta

    def setUp(self):
        mongo = MongoDriver(MongoClient(HOST, PORT)[DB_NAME][BUCKET])
        self.repo = Repository(mongo)

        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]
        self.mongo_collection.drop()
Ejemplo n.º 11
0
class PluginXmlManager():
    def __init__(self):
        self.pluginPath = MongoClient('localhost', 27017).beat.Plugin
        self.systemPath = MongoClient('localhost', 27017).beat.System

    def uploadSystemOnSave(self, xml):
        my_dict = xmltodict.parse(xml)
        self.systemPath.insert_one(my_dict)

    def deleteSystem(self):
        self.systemPath.drop()

    def getListOfPlugins(self):
        pluginList = self.pluginPath.find()
        list_of_plugins = []
        for item in pluginList:
            list_of_plugins.append(item['Plugin']['Plugin_name']['#text'])
        return list_of_plugins

    def deleteSelectedPlugin(self, nameofplugin):
        myquery = {"Plugin.Plugin_name.#text": nameofplugin}
        self.pluginPath.delete_one(myquery)

    def getSelectedPlugin(self, plugin_name):
        pluginList = self.pluginPath.find()
        for item in pluginList:
            if item['Plugin']['Plugin_name']['#text'] == plugin_name:
                return item
        for item in projectsList:
            if item['Project']['BinaryFilePath']['#text'] == project_name:
                return item

    def uploadPlugin(self, xml):
        my_dict = xmltodict.parse(xml)
        self.pluginPath.insert_one(my_dict)

    def pluginExists(self, new_plugin_name):
        pluginList = self.pluginPath.find()
        for item in pluginList:
            if item['Plugin']['Plugin_name']['#text'] == new_plugin_name:
                return True
        return False

    def updatePluginDescription(self, old_description, new_description):
        myquery = {"Plugin.Plugin_Desc.#text": old_description}
        new_values = {"$set": {"Plugin.Plugin_Desc.#text": new_description}}
        self.pluginPath.update_one(myquery, new_values)

    # holder element of where to place xml2
    def xmlMerger(self, holder, xml, xml2):
        tree1 = ET.parse(xml)
        tree2 = ET.parse(xml2)
        xml2 = tree2.getroot()
        for element1 in tree1.findall(holder):
            element1.append(xml2)
        return tree1
Ejemplo n.º 12
0
 def setUp(self):
     product = MongoClient()['test'].product
     pass
     product.drop()
     data = [
             {'name': 'kami', 'age': 11},
             {'name': 'kamisama', 'age': 12} 
             ]
     product.insert(data)
     self.test_target = Db2Csv(product)
def main():
    import sys
    from time import time
    t0 = time()

    if len(sys.argv) < (len(CLI_ARGS)+1):
        print "Usage:", sys.argv[0], " ".join(CLI_ARGS), " ".join(OPT_ARGS)
        exit()

    # Run that immediately so that we crash on the stop if we cannot connect to the DB anyway
    from pymongo import MongoClient
    mdb_host = sys.argv[3].strip()
    mdb = MongoClient(host=mdb_host).users.clicks_per_query

    input_path = sys.argv[1].strip()
    log_filepath = sys.argv[2].strip()

    allowed_user_ids = set()
    with univ_open(input_path, 'r') as f:
        for line in f:
            allowed_user_ids.add(int(line.strip()))

    print "Loaded", len(allowed_user_ids), "allowed user ids."
    
    lp = LogProcessor(log_filepath)
    t0 = time()
    lp.process(allowed_user_ids=allowed_user_ids)
    print "Processed in", time() - t0

    print "Dropping previous DB"
    mdb.drop()
    
    print "Dumping everything into MongoDB"
    t0 = time()
    batch_size = 200000
    for i in xrange(batch_size, len(lp.user_clicks_number), batch_size):
        start = i-batch_size
        end = i
        print "Batch", start, end
        # Not: here we only keep the elements of the list that are not None
        # as the ones that are None are elements filtered out by the allowed_users_ids
        mdb.insert([_ for _ in lp.user_clicks_number[start:end] if _ is not None])
    # len(lp.user_clicks_number) was not a multiple of batch_size, let us execute the last batch:
    if i is not len(lp.user_clicks_number)-1:
        print "Last batch..."
        mdb.insert(lp.user_clicks_number[i:])
    print "Done in", time()-t0
    print "Terminating script."
Ejemplo n.º 14
0
def upload(source_json,
           source,
           db_name=DB_NAME,
           coll_name=VERBS,
           drop=False,
           indices=(VERB, PARADIGM)):
    target = MongoClient(LOCALHOST, PORT)[db_name][coll_name]
    if drop:
        target.drop()
    print('Initially,', target.count(), 'entries')
    count = counter()
    for line in read_json_lines(source_json):
        next(count)
        line[SOURCE] = source
        target.insert(line)
    add_indices(target, indices)
    print('\nCurrently,', target.count(), 'entries')
Ejemplo n.º 15
0
class WAMPTicker(ApplicationSession):
    """ WAMP application - subscribes to the 'ticker' push api and saves pushed
    data into a mongodb """
    @inlineCallbacks
    def onJoin(self, details):
        # open/create poloniex database, ticker collection/table
        self.db = MongoClient().poloniex['ticker']
        self.db.drop()
        self.populateTicker()
        yield self.subscribe(self.onTick, 'ticker')
        logger.info('Subscribed to Ticker')

    def populateTicker(self):
        initTick = self.api.returnTicker()
        for market in initTick:
            initTick[market]['_id'] = market
            self.db.update_one(
                {'_id': market},
                {'$set': initTick[market]},
                upsert=True)
        logger.info('Populated markets database with ticker data')

    def onTick(self, *data):
        logger.debug(data)
        self.db.update_one(
            {"_id": data[0]},
            {"$set": {'last': data[1],
                      'lowestAsk': data[2],
                      'highestBid': data[3],
                      'percentChange': data[4],
                      'baseVolume': data[5],
                      'quoteVolume': data[6],
                      'isFrozen': data[7],
                      'high24hr': data[8],
                      'low24hr': data[9]
                      }},
            upsert=True)

    def onDisconnect(self):
        # stop reactor if disconnected
        if reactor.running:
            reactor.stop()
Ejemplo n.º 16
0
class PrepareCorpus(object):
    """ Prepare and inject a corpus. """
    def __init__(self, **kwargs):
        self.corpus_id = kwargs.pop('corpus_id')
        self.parse = kwargs.pop('parsecls')(**kwargs)
        self.store = MongoClient()['docs'][self.corpus_id]

    def __call__(self):
        log.info("Dropping existing %s document...", self.corpus_id)
        self.store.drop()

        log.info('Loading documents for corpus %s...', self.corpus_id)
        for i, doc in enumerate(self.parse()):
            if i % 250 == 0:
                log.debug('Processed %i documents...', i)
            self.store.insert(doc.json())

        log.info('Import completed for %i documents.', i+1)

    APPS=set()
    @classmethod
    def Register(cls, c):
        cls.APPS.add(c)
        return c
    
    @classmethod
    def add_arguments(cls, p):
        p.add_argument('corpus_id', metavar='CORPUS_ID')
        p.set_defaults(cls=cls)
        
        app_name = 'prepare'
        sp = p.add_subparsers()
        for c in cls.APPS:
            name = c.__name__.lower()
            name = name.replace(app_name,'')
            csp = sp.add_parser(
                name,
                help=c.__doc__.split('\n')[0],
                description=textwrap.dedent(c.__doc__.rstrip()),
                formatter_class=argparse.RawDescriptionHelpFormatter)
            c.add_arguments(csp)
        return p
Ejemplo n.º 17
0
class MongoRepository:
    def __init__(self, index_name):
        url = os.environ.get('MONGO_URI')
        self.db = MongoClient(url).imsearch[index_name]

    def clean(self):
        self.db.drop()

    def insert_one(self, data):
        return self.db.insert_one(data)

    def insert_many(self, data):
        return self.db.insert_many(data)

    def find_one(self, query):
        response = self.db.find_one(query)
        return response

    def find(self, query):
        return list(self.db.find(query))
Ejemplo n.º 18
0
def drop(client, db, cell):  # cell
    '''Delete a cell.

    If a cell did not exist, it is dropped nevertheless. Ask your favourite
    philosopher what it means to delete something that did not exist.

    Example:

    \b
    $ zoo drop --db zika --cell survey
    Are you sure you want to drop the db? [y/N]: y
    Dropped cell "animals" from database "zika".

    \b
    $ zoo drop --db zika --cell survey --force  # no confirmation
    Dropped cell "animals" from database "zika".
    '''
    c = MongoClient(client)[db][cell]
    c.drop()
    print('Dropped cell', '"' + cell + '"', 'from database', '"' + db + '".')
Ejemplo n.º 19
0
def main():
    excel = raw_input("Enter excel file to load: ")
    collection = raw_input("Enter collection (if empty staff collection will be populated): ")
    # collection = ['staff', collection][collection == collection]
    collection = collection if collection else 'staff'
    client = MongoClient("localhost", 27017)['hppi'][collection]
    rd = xlrd.open_workbook(excel)
    sheet = rd.sheet_by_index(0)
    header = sheet.row_values(0)  # column headers
    if fields(set(header), collection) == 0:
        # client.drop()
        for rownum in range(1, sheet.nrows):
            row = sheet.row_values(rownum)  # row values
            data = {}
            for el in range(len(header)):
                if sheet.cell(rownum, el).ctype == 3:  # i.e. datetype
                    dt = datetime.datetime(*xlrd.xldate_as_tuple(row[el], rd.datemode))  # convert date from excel
                else:
                    dt = row[el]
                data[header[el]] = dt  # pack data in a dictionary data[name] = value
            if collection == 'staff':
                data["access"] = 1  # data access right by default (can edit only themselves)
                data["pubsnum"] = 0  # num of pubs
                data["prnd"] = 0  # for prnd
                data["prnd_data"] = {}
                data["publist"] = {}
                data["graduated_year"] = int(data["graduated_year"])
                if not re.match(r"^\S+@\S+\.\S+$", data["email"]):
                    print "No correct email for someone with the surname", data["surname"], "skipping"
                else:
                    user = client.find_one({"email": data["email"]})
                    if not user:
                        client.insert_one(data)  # filling mongodb's collection named "staff" with data
                    else:
                        print "Staff member already present in DB, entry is updated"
                        client.update_one({'email': data["email"]}, {'$set': data})
            else:
                client.drop()
                client.insert_one(data)
    else:
        print "Column names do not correspond to the specification!"
Ejemplo n.º 20
0
class MongoStore(Store):
    def __init__(self, db, collection, url='mongodb://localhost'):
        self.collection = MongoClient(url)[db][collection]

    def fetch(self, oid):
        return self.collection.find_one({'_id': oid})

    def fetch_all(self):
        return self.collection.find()

    def save(self, obj):
        self.collection.save(obj)

    def save_many(seld, obj_iter):
        self.collection.insert(obj_iter)

    def flush(self):
        self.collection.drop()

    def delete(self, oid):
        return self.collection.drop({'_id': oid})
Ejemplo n.º 21
0
class PrepareCorpus(object):
    """ Prepare and inject a corpus. """
    def __init__(self, **kwargs):
        self.corpus_id = kwargs.pop('corpus_id')
        self.parse = kwargs.pop('parsecls')(**kwargs)
        self.store = MongoClient()['docs'][self.corpus_id]

    def __call__(self):
        log.info("Dropping existing %s document...", self.corpus_id)
        self.store.drop()
        log.info('Loading documents for corpus %s...', self.corpus_id)
        for i, doc in enumerate(self.parse()):
            if i % 100 == 0:
                log.debug('Processed %i documents...', i)
            self.store.insert(doc.json())
        log.info('Done.')

    APPS=set()
    @classmethod
    def Register(cls, c):
        cls.APPS.add(c)
        return c
    
    @classmethod
    def add_arguments(cls, p):
        p.add_argument('corpus_id', metavar='CORPUS_ID')
        p.set_defaults(cls=cls)
        
        app_name = 'prepare'
        sp = p.add_subparsers()
        for c in cls.APPS:
            name = c.__name__.lower()
            name = name.replace(app_name,'')
            csp = sp.add_parser(
                name,
                help=c.__doc__.split('\n')[0],
                description=textwrap.dedent(c.__doc__.rstrip()),
                formatter_class=argparse.RawDescriptionHelpFormatter)
            c.add_arguments(csp)
        return p
Ejemplo n.º 22
0
class TickPolo(poloniex.Poloniex):
    def __init__(self, *args, **kwargs):
        super(TickPolo, self).__init__(*args, **kwargs)
        self.db = MongoClient().poloniex['ticker']
        self.db.drop()
        tick = self.returnTicker()
        self._ids = {market: int(tick[market]['id']) for market in tick}
        for market in tick:
            self.db.update_one({'_id': market}, {
                '$set':
                {item: float(tick[market][item])
                 for item in tick[market]}
            },
                               upsert=True)

    def ticker(self, market=None):
        '''returns ticker data saved from websocket '''
        if not self._t or not self._running:
            self.logger.error('Websocket is not running!')
            return self.returnTicker()
        if market:
            return self.db.find_one({'_id': market})
        return list(self.db.find())

    def on_ticker(self, data):
        data = [float(dat) for dat in data]
        self.db.update_one({"id": int(data[0])}, {
            "$set": {
                'last': data[1],
                'lowestAsk': data[2],
                'highestBid': data[3],
                'percentChange': data[4],
                'baseVolume': data[5],
                'quoteVolume': data[6],
                'isFrozen': int(data[7]),
                'high24hr': data[8],
                'low24hr': data[9]
            }
        },
                           upsert=True)
Ejemplo n.º 23
0
class MongoStore(Store):
    def __init__(self, db, collection, uri='mongodb://localhost'):
        self.collection = MongoClient(uri)[db][collection]

    @classmethod
    def get_protocol(cls):
        return 'mongodb'

    def iter_ids(self):
        for obj in self.collection.find({}, {'_id':True}):
            yield obj['_id']

    def flush(self):
        self.collection.drop()

    def exists(self, oid):
        return self.collection.find({'_id':oid}).count() == 1

    def delete(self, oid):
        self.collection.delete_one({'_id':oid})

    def fetch(self, oid):
        return self.collection.find_one({'_id':oid})

    def fetch_all(self):
        return self.collection.find()

    def save(self, obj):
        self.collection.save(obj)        
   
    def save_many(self, obj_iter):
        self.collection.insert(obj_iter)

    @classmethod
    def Get(cls, store_id, uri='mongodb://localhost', **kwargs):
        db, collection = store_id.split(':')
        return cls(db, collection, uri)
Ejemplo n.º 24
0
def parent():
    SLEEP = 10
    p = MongoClient().client["MP"].p
    p.drop()
    p.insert_one({"_id": 1})
    p.insert_one({"_id": 2})
    isParent = True

    newpid1 = os.fork()
    # We are the child
    if newpid1 == 0:
        isParent = False
        child1()
        p = MongoClient().client["MP"].p
        p.remove({"_id": 1})
    # We are the parent
    else:
        newpid2 = os.fork()
        # We are the child
        if newpid2 == 0:
            isParent = False
            child2()
            p = MongoClient().client["MP"].p
            p.remove({"_id": 2})

    if not isParent:
        print "PROCESS FINISHED"
    else:
        wait = True
        while wait:
            ps = p.find({})
            wait = False if ps.count() == 0 else True
            if wait:
                print "MAIN PROCESS WAITING: %i" % ps.count()
                time.sleep(SLEEP)

        print "MAIN PROCESS FINISHED"
Ejemplo n.º 25
0
            'consumer_key': 'pOC3SwyaleFnF7HNiQ4VofXDz',
            'consumer_secret': '9wnavUJlt1wjiPJxhtgElNO5CEsMxKS8M7GgsH7005yJuzsvu3',
        },
        'topics': ['mongodb', 'nsa', 'google']
    }


    t = Tweets(account_details)
    t.connect()
    """
    gevent.spawn(tweets, t).join()
    gevent.joinall([
        gevent.spawn(user_stream, t.queue, t.api),
        gevent.spawn(topic_stream, t.queue, t.api, 'twitter')
    ])
    """
    for tw in t.timeline():
        #doc = json.loads(tw, default=json_util.default)
        col.insert(tw)
    """     
    for tw in t.user_tweets():
        json.dumps(tw, indent=4 * ' ')
    """


col = MongoClient()['firstapp2']['messages']
col.drop()

mail(col)
#tweets(col)
Ejemplo n.º 26
0
class TestBucketIntegration(unittest.TestCase):

    def setUp(self):
        self.db = database.Database(HOST, PORT, DB_NAME)
        self.bucket = bucket.Bucket(
            self.db, BucketConfig(BUCKET, data_group="group", data_type="type", max_age_expected=1000))
        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]

    def setup__timestamp_data(self):
        self.mongo_collection.save({
            "_id": 'last',
            "_timestamp": d_tz(2013, 3, 1),
            "_week_start_at": d_tz(2013, 2, 25),
            "_updated_at": d_tz(2013, 8, 10)
        })
        self.mongo_collection.save({
            "_id": 'first',
            "_timestamp": d_tz(2013, 1, 1),
            "_week_start_at": d_tz(2012, 12, 31),
            "_updated_at": d_tz(2013, 9, 10)
        })
        self.mongo_collection.save({
            "_id": 'second',
            "_timestamp": d_tz(2013, 2, 1),
            "_week_start_at": d_tz(2013, 1, 28),
            "_updated_at": d_tz(2013, 10, 10)
        })

    def tearDown(self):
        self.mongo_collection.drop()

    def test_that_records_get_sent_to_mongo_correctly(self):
        my_record = Record({'foo': 'bar'})
        self.bucket.store(my_record)

        collection = self.mongo_collection.find()
        assert_that(list(collection), only_contains(
            has_entries({"foo": "bar"})
        ))

    def test_that_a_list_of_records_get_sent_to_mongo_correctly(self):
        my_records = [
            Record({'name': 'Groucho'}),
            Record({'name': 'Harpo'}),
            Record({'name': 'Chico'})
        ]

        self.bucket.store(my_records)

        collection = self.mongo_collection.find()
        assert_that(list(collection), only_contains(
            has_entries({'name': 'Groucho'}),
            has_entries({'name': 'Harpo'}),
            has_entries({'name': 'Chico'})
        ))

    def test_period_queries_get_sorted_by__week_start_at(self):
        self.setup__timestamp_data()
        query = Query.create(period=WEEK)
        result = query.execute(self.bucket.repository)
        assert_that(result.data(), contains(
            has_entry('_start_at', d_tz(2012, 12, 31)),
            has_entry('_start_at', d_tz(2013, 1, 28)),
            has_entry('_start_at', d_tz(2013, 2, 25))
        ))

    def test_bucket_returns_last_updated(self):
        self.setup__timestamp_data()
        assert_that(self.bucket.get_last_updated(),
                    equal_to(d_tz(2013, 10, 10)))

    def test_bucket_returns_none_if_there_is_no_last_updated(self):
        assert_that(self.bucket.get_last_updated(), is_(None))

    def test_bucket_is_recent_enough(self):
        self.mongo_collection.save({
            "_id": "first",
            "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=500)
        })
        assert_that(self.bucket.is_recent_enough())

    def test_bucket_is_not_recent_enough(self):
        self.mongo_collection.save({
            "_id": "first",
            "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=50000)
        })
        assert_that(not self.bucket.is_recent_enough())
Ejemplo n.º 27
0
class wsTicker(object):
    def __init__(self, api=None):
        self.api = api
        if not self.api:
            self.api = Poloniex(jsonNums=float)
        self.db = MongoClient().poloniex['ticker']
        self.db.drop()
        self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/",
                                         on_message=self.on_message,
                                         on_error=self.on_error,
                                         on_close=self.on_close)
        self.ws.on_open = self.on_open

    def __call__(self, market=None):
        """ returns ticker from mongodb """
        if market:
            return self.db.find_one({'_id': market})
        return list(self.db.find())

    def on_message(self, ws, message):
        message = json.loads(message)
        if 'error' in message:
            print(message['error'])
            return

        if message[0] == 1002:
            if message[1] == 1:
                print('Subscribed to ticker')
                return

            if message[1] == 0:
                print('Unsubscribed to ticker')
                return

            data = message[2]

            self.db.update_one({"id": float(data[0])}, {
                "$set": {
                    'last': data[1],
                    'lowestAsk': data[2],
                    'highestBid': data[3],
                    'percentChange': data[4],
                    'baseVolume': data[5],
                    'quoteVolume': data[6],
                    'isFrozen': data[7],
                    'high24hr': data[8],
                    'low24hr': data[9]
                }
            },
                               upsert=True)

    def on_error(self, ws, error):
        print(error)

    def on_close(self, ws):
        print("Websocket closed!")

    def on_open(self, ws):
        tick = self.api.returnTicker()
        for market in tick:
            self.db.update_one({'_id': market}, {'$set': tick[market]},
                               upsert=True)
        print('Populated markets database with ticker data')
        self.ws.send(json.dumps({'command': 'subscribe', 'channel': 1002}))

    def start(self):
        self.t = Thread(target=self.ws.run_forever)
        self.t.daemon = True
        self.t.start()
        print('Thread started')

    def stop(self):
        self.ws.close()
        self.t.join()
        print('Thread joined')
Ejemplo n.º 28
0
class MongodbUtil(object):
    """
    - .bashrc 또는 .bashprofile 에 MYSQL_PASSWD 를 설정해야 함.
    """

    def __init__(self, mongo_url, db_name, collection_name, auto_connect=False):
        """
        :param mongo_url: host, port, username, password, auth db
        :param db_name: database name
        :param collection_name: collection name
        :param auto_connect: default do not connect for multiprocessing (http://api.mongodb.com/python/current/faq.html#using-pymongo-with-multiprocessing)
        """
        self.mongo_url = mongo_url
        self.db_name = db_name
        self.collection_name = collection_name
        self.auto_connect = auto_connect
        self.collection = MongoClient(mongo_url, socketKeepAlive=True, connect=auto_connect)[db_name][collection_name]

    def __repr__(self):
        return '%s (db_name:%s, collection_name:%s, auto_connect:%s)' % (
            StringUtil.mask_passwd_in_url(self.mongo_url), self.db_name, self.collection_name, self.auto_connect)

    def __str__(self):
        return self.__repr__()

    def find(self, query=None, sort=None, limit=0):
        if query is None:
            query = {}
        if sort is None:
            sort = [('_id', ASCENDING)]

        for row in self.collection.find(query, no_cursor_timeout=True).sort(sort).limit(limit):
            yield row

    def count(self, query=None):
        if query is None:
            query = {}
        return self.collection.count(query, no_cursor_timeout=True)

    def find_one(self, query: dict, limit=0) -> dict:
        return self.collection.find_one(query, no_cursor_timeout=True).limit(limit)

    def create_index(self, field_list=None, unique=False):
        if field_list is None:
            field_list = []
        for field in field_list:
            self.collection.create_index([(field, ASCENDING)], background=True, unique=unique)
        return

    def insert(self, row: dict):
        return self.collection.insert_one(row)

    def update_one(self, where_query: dict, update_content: dict, upsert=False):
        return self.collection.update_one(
            where_query,
            update_content,
            upsert=upsert
        )

    def update(self, where_query: dict, update_content: dict, upsert=False):
        return self.collection.update_many(
            where_query,
            update_content,
            upsert=upsert
        )

    def save(self, row):
        return self.collection.save(row)

    def delete(self, where_query: dict):
        result = self.collection.delete_one(where_query)
        if result:
            return result.deleted_count
        return 0

    def drop(self):
        return self.collection.drop()
Ejemplo n.º 29
0
class PrepareCorpus(object):
    """ Prepare and inject a corpus. """
    def __init__(self, **kwargs):
        self.corpus_id = kwargs.pop('corpus_id')
        self.parse = kwargs.pop('parsecls')(**kwargs)
        self.store = MongoClient()['docs'][self.corpus_id]

    def __call__(self):
        log.info("Dropping existing %s document...", self.corpus_id)
        self.store.drop()

        # mention statistics
        total_non_nil = 0
        total_candidate_recalled = 0
        chain_mention_counts = []
        chain_candidate_counts = []

        log.info('Loading documents for corpus %s...', self.corpus_id)
        for i, doc in enumerate(self.parse()):
            if i % 100 == 0:
                log.debug('Processed %i documents...', i)

            # accumulate mention statistics
            for chain in doc.chains:
                chain_mention_counts.append(len(chain.mentions))
                chain_candidate_counts.append(len(chain.candidates))
                if chain.resolution != None:
                    total_non_nil += len(chain.mentions)
                    if chain.resolution.id in [c.id for c in chain.candidates]:
                        total_candidate_recalled += len(chain.mentions)
                    else:
                        log.warn('No candidate for chain: %s - %s', chain.resolution.id, ', '.join(set("'" + m.text.lower() + "'" for m in chain.mentions)))

            self.store.insert(doc.json())

        total_chains = len(chain_mention_counts)
        total_mentions = sum(chain_mention_counts)

        section_delimiter = '-' * 40
        log.info(section_delimiter)
        log.info('CORPUS STATISTICS')
        log.info(section_delimiter)
        log.info('Total mentions           = %i', total_mentions)
        log.info('Total nil mentions (%%)   = %i (%.2f)', total_mentions - total_non_nil, float(total_mentions - total_non_nil) / total_mentions)

        log.info(section_delimiter)
        log.info('Total chains             = %i', total_chains)
        log.info('Mentions per Chain (σ)   = %.1f (%.2f)', numpy.mean(chain_mention_counts), numpy.std(chain_mention_counts))

        log.info(section_delimiter)
        log.info('Candidates per Chain (σ) = %.1f (%.2f)', numpy.mean(chain_candidate_counts), numpy.std(chain_candidate_counts))

        no_candidates_count = sum(1 for c in chain_candidate_counts if c == 0)
        candidate_recall = 'n/a' if total_non_nil == 0 else '%.2f' % (float(total_candidate_recalled) / total_non_nil)
        log.info('Candidate Recall (%%)     = %s', candidate_recall)
        log.info('Nil Candidate Chains (%%) = %i (%.2f)', no_candidates_count, float(no_candidates_count) / total_chains)
        log.info(section_delimiter)

        log.info('Import completed for %i documents.', i+1)

    APPS=set()
    @classmethod
    def Register(cls, c):
        cls.APPS.add(c)
        return c
    
    @classmethod
    def add_arguments(cls, p):
        p.add_argument('corpus_id', metavar='CORPUS_ID')
        p.set_defaults(cls=cls)
        
        app_name = 'prepare'
        sp = p.add_subparsers()
        for c in cls.APPS:
            name = c.__name__.lower()
            name = name.replace(app_name,'')
            csp = sp.add_parser(
                name,
                help=c.__doc__.split('\n')[0],
                description=textwrap.dedent(c.__doc__.rstrip()),
                formatter_class=argparse.RawDescriptionHelpFormatter)
            c.add_arguments(csp)
        return p
Ejemplo n.º 30
0
def clean_collection():
    uri = os.environ.get("MONGO_HOST", "mongodb://localhost/test")
    collection = MongoClient(uri).test.test
    collection.drop()
    return collection
Ejemplo n.º 31
0
from pymongo import MongoClient
from textblob import TextBlob
from newspaper import Article
from newspaper import ArticleException

dataForEntityAnalysis = MongoClient().precogTask.dataForAnalysis.find()
tweetCollection = MongoClient().precogTask.tweetCollection

polarityCollection = MongoClient().precogTask.polarityCollection
polarityCollection.drop()
for ne in dataForEntityAnalysis:
    #Finding avg tweet polarity
    wordData = {}
    wordData['word'] = ne['word']

    tweetPolarityList = []
    totalTweetPolarity = 0
    print("analysing word " + ne['word'])
    for t in ne['tweetList']:
        print("analysing tweet " + str(t))
        tweet = tweetCollection.find_one({'_id' : t})
        tempPol = TextBlob(tweet['text']).sentiment.polarity
        tweetPolarityList.append(tempPol)
        totalTweetPolarity += tempPol
    avgTweetPolarity = totalTweetPolarity/len(ne['tweetList'])
    wordData['tweetPolarityList'] = tweetPolarityList

    #finding average article polarity
    totalArticlePolarity = 0
    articlePolarityList = []
    for articleURL in ne['articleLinksList']:
class TestMongoDriver(unittest.TestCase):
    def setUp(self):
        self.mongo_driver = MongoDriver(
            MongoClient(HOST, PORT)[DB_NAME][BUCKET])

        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]
        self.mongo_collection.drop()

    def test_save(self):
        thing_to_save = {'name': 'test_document'}
        another_thing_to_save = {'name': '2nd_test_document'}

        self.mongo_driver.save(thing_to_save)
        self.mongo_driver.save(another_thing_to_save)

        results = self.mongo_collection.find()
        assert_that(results, has_item(thing_to_save))
        assert_that(results, has_item(another_thing_to_save))

    def test_save_updates_document_with_id(self):
        a_document = {"_id": "event1", "title": "I'm an event"}
        updated_document = {"_id": "event1", "title": "I'm another event"}

        self.mongo_driver.save(a_document)
        self.mongo_driver.save(updated_document)

        saved_documents = self.mongo_collection.find()

        assert_that(saved_documents, only_contains(updated_document))

    def test_find(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": "guitar"},
                                         sort=["name", "ascending"],
                                         limit=None)

        assert_that(
            results,
            contains(
                has_entries({
                    "name": "George",
                    "plays": "guitar"
                }),
                has_entries({
                    "name": "John",
                    "plays": "guitar"
                }),
            ))

    def test_find_sort_descending(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": "guitar"},
                                         sort=["name", "descending"],
                                         limit=None)

        assert_that(
            results,
            contains(
                has_entries({
                    "name": "John",
                    "plays": "guitar"
                }),
                has_entries({
                    "name": "George",
                    "plays": "guitar"
                }),
            ))

    def test_find_with_limit(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": {
            "$ne": "guitar"
        }},
                                         sort=["name", "descending"],
                                         limit=1)

        assert_that(results,
                    contains(has_entries({
                        "name": "Ringo",
                        "plays": "drums"
                    })))

    def test_group(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"],
                                          query={},
                                          collect_fields=[])

        assert_that(
            results,
            contains_inanyorder(
                has_entries({
                    "_count": is_(2),
                    "type": "wind"
                }), has_entries({
                    "_count": is_(3),
                    "type": "string"
                })))

    def test_group_with_query(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"],
                                          query={"range": "high"},
                                          collect_fields=[])

        assert_that(
            results,
            contains_inanyorder(
                has_entries({
                    "_count": is_(1),
                    "type": "wind"
                }), has_entries({
                    "_count": is_(2),
                    "type": "string"
                })))

    def test_group_and_collect_additional_properties(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"],
                                          query={},
                                          collect_fields=["range"])

        assert_that(
            results,
            contains(
                has_entries({
                    "_count": is_(2),
                    "type": "wind",
                    "range": ["high", "low"]
                }),
                has_entries({
                    "_count": is_(3),
                    "type": "string",
                    "range": ["high", "high", "low"]
                })))

    def test_group_and_collect_with_hyphen_in_field_name(self):
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "foo"})

        results = self.mongo_driver.group(keys=["type"],
                                          query={},
                                          collect_fields=["this-name"])

        assert_that(
            results,
            contains(
                has_entries({
                    "_count": is_(2),
                    "type": "foo",
                    "this-name": ["bar", "bar"]
                }),
                has_entries({
                    "_count": is_(2),
                    "type": "bar",
                    "this-name": ["bar", "foo"]
                })))

    def test_group_and_collect_with_injection_attempt(self):
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "foo"})

        for collect_field in ["name']-foo", "name\\']-foo"]:
            results = self.mongo_driver.group(keys=["type"],
                                              query={},
                                              collect_fields=[collect_field])

            assert_that(
                results,
                contains(has_entries({
                    "_count": is_(2),
                    "type": "foo"
                }), has_entries({
                    "_count": is_(2),
                    "type": "bar"
                })))

    def test_group_and_collect_with_false_value(self):
        self.mongo_collection.save({"foo": "one", "bar": False})
        self.mongo_collection.save({"foo": "two", "bar": True})
        self.mongo_collection.save({"foo": "two", "bar": True})
        self.mongo_collection.save({"foo": "one", "bar": False})

        results = self.mongo_driver.group(["foo"], {}, ["bar"])

        assert_that(
            results,
            contains(has_entries({"bar": [False, False]}),
                     has_entries({"bar": [True, True]})))

    def test_group_without_keys(self):
        self._setup_people()

        results = self.mongo_driver.group(keys=[], query={}, collect_fields=[])

        assert_that(results, contains(has_entries({"_count": is_(4)}), ))

    # this responsibility does not belong here
    def test_group_ignores_documents_without_grouping_keys(self):
        self._setup_people()
        self.mongo_collection.save({"name": "Yoko"})

        results = self.mongo_driver.group(keys=["plays"],
                                          query={},
                                          collect_fields=[])

        assert_that(
            results,
            contains(
                has_entries({
                    "_count": is_(2),
                    "plays": "guitar"
                }),
                has_entries({
                    "_count": is_(1),
                    "plays": "bass"
                }),
                has_entries({
                    "_count": is_(1),
                    "plays": "drums"
                }),
            ))

    def _setup_people(self):
        self.mongo_collection.save({"name": "George", "plays": "guitar"})
        self.mongo_collection.save({"name": "John", "plays": "guitar"})
        self.mongo_collection.save({"name": "Paul", "plays": "bass"})
        self.mongo_collection.save({"name": "Ringo", "plays": "drums"})

    def _setup_musical_instruments(self):
        self.mongo_collection.save({
            "instrument": "flute",
            "type": "wind",
            "range": "high"
        })
        self.mongo_collection.save({
            "instrument": "contrabassoon",
            "type": "wind",
            "range": "low"
        })
        self.mongo_collection.save({
            "instrument": "violin",
            "type": "string",
            "range": "high"
        })
        self.mongo_collection.save({
            "instrument": "viola",
            "type": "string",
            "range": "high"
        })
        self.mongo_collection.save({
            "instrument": "cello",
            "type": "string",
            "range": "low"
        })
class wsTicker(object):

    def __init__(self, api=None):
        self.api = api
        if not self.api:
            self.api = Poloniex(jsonNums=float)
        self.db = MongoClient().poloniex['ticker']
        self.db.drop()
        self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/",
                                         on_message=self.on_message,
                                         on_error=self.on_error,
                                         on_close=self.on_close)
        self.ws.on_open = self.on_open

    def __call__(self, market=None):
        """ returns ticker from mongodb """
        if market:
            return self.db.find_one({'_id': market})
        return list(self.db.find())

    def on_message(self, ws, message):
        message = json.loads(message)
        if 'error' in message:
            print(message['error'])
            return

        if message[0] == 1002:
            if message[1] == 1:
                print('Subscribed to ticker')
                return

            if message[1] == 0:
                print('Unsubscribed to ticker')
                return

            data = message[2]

            self.db.update_one(
                {"id": float(data[0])},
                {"$set": {'last': data[1],
                          'lowestAsk': data[2],
                          'highestBid': data[3],
                          'percentChange': data[4],
                          'baseVolume': data[5],
                          'quoteVolume': data[6],
                          'isFrozen': data[7],
                          'high24hr': data[8],
                          'low24hr': data[9]
                          }},
                upsert=True)

    def on_error(self, ws, error):
        print(error)

    def on_close(self, ws):
        print("Websocket closed!")

    def on_open(self, ws):
        tick = self.api.returnTicker()
        for market in tick:
            self.db.update_one(
                {'_id': market},
                {'$set': tick[market]},
                upsert=True)
        print('Populated markets database with ticker data')
        self.ws.send(json.dumps({'command': 'subscribe',
                                 'channel': 1002}))

    def start(self):
        self.t = Thread(target=self.ws.run_forever)
        self.t.daemon = True
        self.t.start()
        print('Thread started')

    def stop(self):
        self.ws.close()
        self.t.join()
        print('Thread joined')
Ejemplo n.º 34
0
class TestDataSetIntegration(unittest.TestCase):
    def setUp(self):
        self.storage = MongoStorageEngine.create(HOSTS, PORT, DB_NAME)

        self.config = {
            'name': DATA_SET,
            'data_group': "group",
            'data_type': "type",
            'max_age_expected': 1000,
        }

        self.data_set = DataSet(self.storage, self.config)

        self.mongo_collection = MongoClient(HOSTS, PORT)[DB_NAME][DATA_SET]

    def setup__timestamp_data(self):
        self.mongo_collection.save({
            "_id": 'last',
            "_timestamp": d_tz(2013, 3, 1),
            "_week_start_at": d_tz(2013, 2, 25),
            "_updated_at": d_tz(2013, 8, 10)
        })
        self.mongo_collection.save({
            "_id": 'first',
            "_timestamp": d_tz(2013, 1, 1),
            "_week_start_at": d_tz(2012, 12, 31),
            "_updated_at": d_tz(2013, 9, 10)
        })
        self.mongo_collection.save({
            "_id": 'second',
            "_timestamp": d_tz(2013, 2, 1),
            "_week_start_at": d_tz(2013, 1, 28),
            "_updated_at": d_tz(2013, 10, 10)
        })

    def tearDown(self):
        self.mongo_collection.drop()

    def test_period_queries_get_sorted_by__week_start_at(self):
        self.setup__timestamp_data()
        query = Query.create(period=WEEK)
        result = self.data_set.execute_query(query)
        assert_that(
            result,
            contains(has_entry('_start_at', d_tz(2012, 12, 31)),
                     has_entry('_start_at', d_tz(2013, 1, 28)),
                     has_entry('_start_at', d_tz(2013, 2, 25))))

    def test_data_set_is_recent_enough(self):
        self.mongo_collection.save({
            "_id":
            "first",
            "_updated_at":
            datetime.datetime.now() - datetime.timedelta(seconds=500)
        })
        assert_that(self.data_set.is_recent_enough())

    def test_data_set_is_not_recent_enough(self):
        self.mongo_collection.save({
            "_id":
            "first",
            "_updated_at":
            datetime.datetime.now() - datetime.timedelta(seconds=50000)
        })
        assert_that(not self.data_set.is_recent_enough())
Ejemplo n.º 35
0
class TestMongoDriver(unittest.TestCase):
    def setUp(self):
        self.mongo_driver = MongoDriver(MongoClient(HOST, PORT)[DB_NAME][BUCKET])

        self.mongo_collection = MongoClient(HOST, PORT)[DB_NAME][BUCKET]
        self.mongo_collection.drop()

    def test_save(self):
        thing_to_save = {'name': 'test_document'}
        another_thing_to_save = {'name': '2nd_test_document'}

        self.mongo_driver.save(thing_to_save)
        self.mongo_driver.save(another_thing_to_save)

        results = self.mongo_collection.find()
        assert_that(results, has_item(thing_to_save))
        assert_that(results, has_item(another_thing_to_save))

    def test_save_updates_document_with_id(self):
        a_document = {"_id": "event1", "title": "I'm an event"}
        updated_document = {"_id": "event1", "title": "I'm another event"}

        self.mongo_driver.save(a_document)
        self.mongo_driver.save(updated_document)

        saved_documents = self.mongo_collection.find()

        assert_that(saved_documents, only_contains(updated_document))

    def test_find_one(self):
        self._setup_people()

        result = self.mongo_driver.find_one(query={"name": "George"})

        assert_that(result, has_entries({
            "name": "George", "plays": "guitar"
        }))

    def test_find(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": "guitar"},
                                         sort=["name", "ascending"],
                                         limit=None)

        assert_that(results, contains(
            has_entries({"name": "George", "plays": "guitar"}),
            has_entries({"name": "John", "plays": "guitar"}),
        ))

    def test_find_sort_descending(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": "guitar"},
                                         sort=["name", "descending"],
                                         limit=None)

        assert_that(results, contains(
            has_entries({"name": "John", "plays": "guitar"}),
            has_entries({"name": "George", "plays": "guitar"}),
        ))

    def test_find_with_limit(self):
        self._setup_people()

        results = self.mongo_driver.find(query={"plays": {"$ne": "guitar"}},
                                         sort=["name", "descending"],
                                         limit=1)

        assert_that(results, contains(
            has_entries({"name": "Ringo", "plays": "drums"})
        ))

    def test_group(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[])

        assert_that(results, contains_inanyorder(
            has_entries({"_count": is_(2), "type": "wind"}),
            has_entries({"_count": is_(3), "type": "string"})
        ))

    def test_group_with_query(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"],
                                          query={"range": "high"},
                                          collect_fields=[])

        assert_that(results, contains_inanyorder(
            has_entries({"_count": is_(1), "type": "wind"}),
            has_entries({"_count": is_(2), "type": "string"})
        ))

    def test_group_and_collect_additional_properties(self):
        self._setup_musical_instruments()

        results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["range"])

        assert_that(results, contains(
            has_entries(
                {"_count": is_(2),
                 "type": "wind",
                 "range": ["high", "low"]}),
            has_entries(
                {"_count": is_(3),
                 "type": "string",
                 "range": ["high", "high", "low"]})
        ))

    def test_group_and_collect_with_hyphen_in_field_name(self):
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "foo"})

        results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=["this-name"])

        assert_that(results, contains(
            has_entries(
                {"_count": is_(2),
                 "type": "foo",
                 "this-name": ["bar", "bar"]}),
            has_entries(
                {"_count": is_(2),
                 "type": "bar",
                 "this-name": ["bar", "foo"]})
        ))

    def test_group_and_collect_with_injection_attempt(self):
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "foo", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "bar"})
        self.mongo_collection.save({"type": "bar", "this-name": "foo"})

        for collect_field in ["name']-foo", "name\\']-foo"]:
            results = self.mongo_driver.group(keys=["type"], query={}, collect_fields=[collect_field])

            assert_that(results, contains(
                has_entries(
                    {"_count": is_(2),
                     "type": "foo"}),
                has_entries(
                    {"_count": is_(2),
                     "type": "bar"})
            ))

    def test_group_and_collect_with_false_value(self):
        self.mongo_collection.save({"foo": "one", "bar": False})
        self.mongo_collection.save({"foo": "two", "bar": True})
        self.mongo_collection.save({"foo": "two", "bar": True})
        self.mongo_collection.save({"foo": "one", "bar": False})

        results = self.mongo_driver.group(["foo"], {}, ["bar"])

        assert_that(results, contains(
            has_entries({
                "bar": [False, False]
            }),
            has_entries({
                "bar": [True, True]
            })
        ))

    def test_group_without_keys(self):
        self._setup_people()

        results = self.mongo_driver.group(keys=[], query={}, collect_fields=[])

        assert_that(results, contains(
            has_entries({"_count": is_(4)}),
        ))

    # this responsibility does not belong here
    def test_group_ignores_documents_without_grouping_keys(self):
        self._setup_people()
        self.mongo_collection.save({"name": "Yoko"})

        results = self.mongo_driver.group(keys=["plays"], query={}, collect_fields=[])

        assert_that(results, contains(
            has_entries({"_count": is_(2), "plays": "guitar"}),
            has_entries({"_count": is_(1), "plays": "bass"}),
            has_entries({"_count": is_(1), "plays": "drums"}),
        ))

    def _setup_people(self):
        self.mongo_collection.save({"name": "George", "plays": "guitar"})
        self.mongo_collection.save({"name": "John", "plays": "guitar"})
        self.mongo_collection.save({"name": "Paul", "plays": "bass"})
        self.mongo_collection.save({"name": "Ringo", "plays": "drums"})

    def _setup_musical_instruments(self):
        self.mongo_collection.save(
            {"instrument": "flute", "type": "wind", "range": "high"})
        self.mongo_collection.save(
            {"instrument": "contrabassoon", "type": "wind", "range": "low"})
        self.mongo_collection.save(
            {"instrument": "violin", "type": "string", "range": "high"})
        self.mongo_collection.save(
            {"instrument": "viola", "type": "string", "range": "high"})
        self.mongo_collection.save(
            {"instrument": "cello", "type": "string", "range": "low"})
Ejemplo n.º 36
0
import json
from pymongo import MongoClient

with open("Source_Material/poems_etc/poems_etc_parsed/the_sonnets.json",
          "r") as s:
    sonnets = json.load(s)
s.close()

client = MongoClient()
sonnet_collection = MongoClient().dustball_db.sonnets
sonnet_collection.drop()


def add_author(s):
    s["author_first_name"] = "william"
    s["author_last_name"] = "shakespeare"
    return s


prepped_sonnets = list(map(add_author, sonnets))
sonnet_collection.insert_many(prepped_sonnets)
sonnet_collection.create_index([("text", "text")])
client.close()
Ejemplo n.º 37
0
class TickerGenerator(object):
    def __init__(self, slack_info, mongo_ip):
        self.api = Poloniex()

        self.db = MongoClient(mongo_uri).poloniex['ticker']

        self.db.drop()

        self.ws = websocket.WebSocketApp("wss://api2.poloniex.com/",
                                         on_message=self.on_message,
                                         on_error=self.on_error,
                                         on_close=self.on_close)

        self.ws.on_open = self.on_open

        self.slack_client = slack_info['client']

        self.slack_channel_id_alerts = slack_info['channels']['alerts'][1]
        self.slack_channel_id_exceptions = slack_info['channels'][
            'exceptions'][1]

        self.last_update = None

    def __call__(self, market=None):
        if market:
            return self.db.find_one({'_id': market})

        return list(self.db.find())

    def on_message(self, ws, message):
        message = json.loads(message)

        #print(message)

        if 'error' in message:
            #print(message['error'])
            logger.error(message['error'])

            # SEND SLACK EXCEPTION MESSAGE HERE

            return

        if message[0] == 1002:
            if message[1] == 1:
                #print('Subscribed to ticker')
                logger.debug('Subscribed to ticker.')

                return

            if message[1] == 0:
                #print('Unsubscribed to ticker')
                logger.debug('Unsubscribed from ticker.')

                return

            data = message[2]

            self.db.update_one({"id": float(data[0])}, {
                "$set": {
                    'last': float(data[1]),
                    'lowestAsk': float(data[2]),
                    'highestBid': float(data[3]),
                    'percentChange': float(data[4]),
                    'baseVolume': float(data[5]),
                    'quoteVolume': float(data[6]),
                    'isFrozen': float(data[7]),
                    'high24hr': float(data[8]),
                    'low24hr': float(data[9])
                }
            },
                               upsert=True)

            self.last_update = time.time()

    def on_error(self, ws, error):
        #print(error)
        logger.error(error)

        slack_message = 'Error returned from websocket connection:\n'
        slack_message += str(error)

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

    def on_close(self, ws):
        #print("Websocket closed!")
        logger.debug('Websocket closed.')

        slack_message = 'Websocket closed.'

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

    def on_open(self, ws):
        tick = self.api.returnTicker()

        for market in tick:
            self.db.update_one({'_id': market}, {'$set': tick[market]},
                               upsert=True)

        #print('Populated markets database with ticker data')
        logger.debug(
            'Populated markets database with ticker data from REST API.')

        slack_message = 'MongoDB populated with REST API market ticker data.'

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

        #self.last_update = datetime.datetime.now()

        self.ws.send(json.dumps({'command': 'subscribe', 'channel': 1002}))

        slack_message = 'Subscribed to ticker websocket.'

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

    def start(self):
        self.t = Thread(target=self.ws.run_forever)

        self.t.daemon = True

        self.t.start()

        #print('Thread started')
        logger.debug('Thread started.')

        #slack_message = 'Ticker startup initialized.'
        slack_message = '\n*_Ticker startup initialized at ' + str(
            datetime.datetime.now()) + '._*\n\n'

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

    def stop(self):
        self.ws.close()

        self.t.join()

        #print('Thread joined')
        logger.debug('Thread joined.')

        slack_message = '*TICKER SHUTDOWN COMPLETED AT ' + str(
            datetime.datetime.now()) + '.*'

        #slack_return = Ticker.send_slack_alert(self, channel_id=self.slack_channel_id_alerts, message=slack_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

    def monitor(self, timeout, alert_reset_interval=10):
        error_timeout = timeout

        error_message_sent = False

        error_message_time = None

        error_message_reset = datetime.timedelta(minutes=alert_reset_interval)

        slack_message = '*MONITOR ACTIVE. TICKER READY FOR USE.*'

        #slack_return = ticker.send_slack_alert(channel_id=slack_channel_id_alerts, message=error_message)
        slack_return = TickerGenerator.send_slack_alert(
            self,
            channel_id=self.slack_channel_id_alerts,
            message=slack_message)

        logger.debug('slack_return: ' + str(slack_return))

        while (True):
            try:
                #logger.debug('ticker.last_update: ' + str(ticker.last_update))
                #if (datetime.datetime.now() - ticker.last_update) > error_timeout:
                #if (time.time() - ticker.last_update) > error_timeout:
                if (time.time() - self.last_update) > error_timeout:
                    if error_message_sent == False:
                        error_message = '*NO TICKER DATA RECEIVED IN 30 SECONDS. AN ERROR HAS OCCURRED THAT REQUIRES IMMEDIATE ATTENTION.*'

                        #slack_return = ticker.send_slack_alert(channel_id=slack_channel_id_alerts, message=error_message)
                        slack_return = TickerGenerator.send_slack_alert(
                            self,
                            channel_id=self.slack_channel_id_alerts,
                            message=error_message)

                        logger.debug('slack_return: ' + str(slack_return))

                        error_message_sent = True

                        error_message_time = datetime.datetime.now()

                if error_message_sent == True and (
                        datetime.datetime.now() -
                        error_message_time) > error_message_reset:
                    logger.info(
                        'Resetting error message sent switch to allow another alert.'
                    )

                    error_message_sent = False

                time.sleep(1)

            except Exception as e:
                logger.exception('Exception in inner loop.')
                logger.exception(e)

            except KeyboardInterrupt:
                logger.info(
                    'Exit signal raised in TickerGenerator.monitor. Breaking from monitor loop.'
                )

                break

    def send_slack_alert(self, channel_id, message):
        alert_return = {'Exception': False, 'result': {}}

        try:
            alert_return['result'] = slack_client.api_call(
                'chat.postMessage',
                channel=channel_id,
                text=message,
                username=slack_bot_user,
                icon_url=slack_bot_icon)

        except Exception as e:
            logger.exception(
                'Exception raised in TickerGenerator.send_slack_alert().')
            logger.exception(e)

            alert_return['Exception'] = True

        finally:
            return alert_return
import csv
import re
from bson import SON

from pymongo import MongoClient
import sys

collection = MongoClient().test.cafes

print('Dropping test.cafes collection')
collection.drop()

# Pattern matching simple decimal numbers.
float_pat = r'-?[0-9]+(\.[0-9]+)?'

# Pattern matching addresses, which are like:
# '123 MAIN ST (40.73, -73.98)'
location_pat = re.compile(
    r'(?P<address>(.|\n)+)\n\((?P<lat>%s), (?P<lon>%s)' % (
        float_pat, float_pat),
    re.MULTILINE)

csv_file = csv.DictReader(open('sidewalk-cafes.csv'))
n_lines = 0
batch = []
for line in csv_file:
    location_field = line.pop('Location 1')
    match = location_pat.match(location_field)
    assert match, repr(location_field)
    group_dict = match.groupdict()
    lon, lat = float(group_dict['lon']), float(group_dict['lat'])
Ejemplo n.º 39
0
newsApi = NewsApiClient(api_key='XXXXXXXXXXXXXXXXXXXXXXXXXXXXx')

## Get the top 5 Named Entities
namedEntityCol = MongoClient().precogTask.namedEntityCollection
col = namedEntityCol.find()
count = 0
# wordList used to store documents sirectly from namedEntityCollection
wordList = []
for k in sorted(col, key=lambda k: len(k['tweet']), reverse=True):
    count += 1
    if count > 5:
        break
    wordList.append(k)

dataForEntityAnalysis = MongoClient().precogTask.dataForAnalysis
dataForEntityAnalysis.drop()

for ne in wordList:
    word = ne['word']
    tweets = ne['tweet']
    articleLinks = []

    allNews = newsApi.get_everything(
        q=word,
        language='en',
        sort_by='relevancy',
    )
    articles = allNews['articles']
    print("----------------------------------------------")
    print(word)
    for article in articles:
Ejemplo n.º 40
0
                    perfcl.update({"_id":tid},{"$set":t})
                    appl = data["appl"]
            t = aiodb.find_one({"key":key,"appl":appl})
            if not t:
                meta = {"key":key,"appl":appl,"order":index}
                aiodb.insert(meta)
            else:
                if t["order"] < index:
                    _id = t["_id"]
                    aiodb.update({"_id":_id},{"$set":{"order":index}})
                    print "all in one update ", index
                    
if __name__ == "__main__":
    print datetime.now(),"start dedupe"
    st = time.time()
    aiodb.drop()
    for i in db.find().sort("index",-1):
        try:
            i["entry_time"]
        except:
            continue
        filename = i["file"]
        index = i["index"]
        try:
            home_date = config.get("base",filename)
        except:
            print "%s not found homedate" % filename
            continue
        home_date = datetime.strptime(home_date,r"%Y/%m/%d")
        exec("mailcl = rpdb.mail%s.mail" % index)
        exec("perfcl = rpdb.mail%s.perf" % index)
Ejemplo n.º 41
0
class TestDataSetIntegration(unittest.TestCase):

    def setUp(self):
        self.storage = MongoStorageEngine.create(HOSTS, PORT, DB_NAME)

        self.config = {
            'name': DATA_SET,
            'data_group': "group",
            'data_type': "type",
            'max_age_expected': 1000,
        }

        self.data_set = DataSet(self.storage, self.config)

        self.mongo_collection = MongoClient(HOSTS, PORT)[DB_NAME][DATA_SET]

    def setup__timestamp_data(self):
        self.mongo_collection.save({
            "_id": 'last',
            "_timestamp": d_tz(2013, 3, 1),
            "_week_start_at": d_tz(2013, 2, 25),
            "_updated_at": d_tz(2013, 8, 10)
        })
        self.mongo_collection.save({
            "_id": 'first',
            "_timestamp": d_tz(2013, 1, 1),
            "_week_start_at": d_tz(2012, 12, 31),
            "_updated_at": d_tz(2013, 9, 10)
        })
        self.mongo_collection.save({
            "_id": 'second',
            "_timestamp": d_tz(2013, 2, 1),
            "_week_start_at": d_tz(2013, 1, 28),
            "_updated_at": d_tz(2013, 10, 10)
        })

    def tearDown(self):
        self.mongo_collection.drop()

    def test_period_queries_get_sorted_by__week_start_at(self):
        self.setup__timestamp_data()
        query = Query.create(period=WEEK)
        result = self.data_set.execute_query(query)
        assert_that(result, contains(
            has_entry('_start_at', d_tz(2012, 12, 31)),
            has_entry('_start_at', d_tz(2013, 1, 28)),
            has_entry('_start_at', d_tz(2013, 2, 25))
        ))

    def test_data_set_is_recent_enough(self):
        self.mongo_collection.save({
            "_id": "first",
            "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=500)
        })
        assert_that(self.data_set.is_recent_enough())

    def test_data_set_is_not_recent_enough(self):
        self.mongo_collection.save({
            "_id": "first",
            "_updated_at": datetime.datetime.now() - datetime.timedelta(seconds=50000)
        })
        assert_that(not self.data_set.is_recent_enough())
Ejemplo n.º 42
0
class EventsTestMixin(TestCase):
    """
    Helpers and setup for running tests that evaluate events emitted
    """
    def setUp(self):
        super(EventsTestMixin, self).setUp()
        self.event_collection = MongoClient()["test"]["events"]
        self.reset_event_tracking()

    def reset_event_tracking(self):
        """Drop any events that have been collected thus far and start collecting again from scratch."""
        self.event_collection.drop()
        self.start_time = datetime.now()

    @contextmanager
    def capture_events(self, event_filter=None, number_of_matches=1, captured_events=None):
        """
        Context manager that captures all events emitted while executing a particular block.

        All captured events are stored in the list referenced by `captured_events`. Note that this list is appended to
        *in place*. The events will be appended to the list in the order they are emitted.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.

        `number_of_matches` tells this context manager when enough events have been found and it can move on. The
        context manager will not exit until this many events have passed the filter. If not enough events are found
        before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that
        *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of
        `captured_events`.
        """
        start_time = datetime.utcnow()

        yield

        events = self.wait_for_events(
            start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches)

        if captured_events is not None and hasattr(captured_events, 'append') and callable(captured_events.append):
            for event in events:
                captured_events.append(event)

    @contextmanager
    def assert_events_match_during(self, event_filter=None, expected_events=None):
        """
        Context manager that ensures that events matching the `event_filter` and `expected_events` are emitted.

        This context manager will filter out the event stream using the `event_filter` and wait for
        `len(expected_events)` to match the filter.

        It will then compare the events in order with their counterpart in `expected_events` to ensure they match the
        more detailed assertion.

        Typically `event_filter` will be an `event_type` filter and the `expected_events` list will contain more
        detailed assertions.
        """
        captured_events = []
        with self.capture_events(event_filter, len(expected_events), captured_events):
            yield

        self.assert_events_match(expected_events, captured_events)

    def wait_for_events(self, start_time=None, event_filter=None, number_of_matches=1, timeout=None):
        """
        Wait for `number_of_matches` events to pass the `event_filter`.

        By default, this will look at all events that have been emitted since the beginning of the setup of this mixin.
        A custom `start_time` can be specified which will limit the events searched to only those emitted after that
        time.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.

        `number_of_matches` lets us know when enough events have been found and it can move on. The function will not
        return until this many events have passed the filter. If not enough events are found before a timeout expires,
        then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have
        been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`.

        Specifying a custom `timeout` can allow you to extend the default 30 second timeout if necessary.
        """
        if start_time is None:
            start_time = self.start_time

        if timeout is None:
            timeout = 30

        def check_for_matching_events():
            """Gather any events that have been emitted since `start_time`"""
            return self.matching_events_were_emitted(
                start_time=start_time,
                event_filter=event_filter,
                number_of_matches=number_of_matches
            )

        return Promise(
            check_for_matching_events,
            # This is a bit of a hack, Promise calls str(description), so I set the description to an object with a
            # custom __str__ and have it do some intelligent stuff to generate a helpful error message.
            CollectedEventsDescription(
                'Waiting for {number_of_matches} events to match the filter:\n{event_filter}'.format(
                    number_of_matches=number_of_matches,
                    event_filter=self.event_filter_to_descriptive_string(event_filter),
                ),
                functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={})
            ),
            timeout=timeout
        ).fulfill()

    def matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1):
        """Return True if enough events have been emitted that pass the `event_filter` since `start_time`."""
        matching_events = self.get_matching_events_from_time(start_time=start_time, event_filter=event_filter)
        return len(matching_events) >= number_of_matches, matching_events

    def get_matching_events_from_time(self, start_time=None, event_filter=None):
        """
        Return a list of events that pass the `event_filter` and were emitted after `start_time`.

        This function is used internally by most of the other assertions and convenience methods in this class.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.
        """
        if start_time is None:
            start_time = self.start_time

        if isinstance(event_filter, dict):
            event_filter = functools.partial(is_matching_event, event_filter)
        elif not callable(event_filter):
            raise ValueError(
                'event_filter must either be a dict or a callable function with as single "event" parameter that '
                'returns a boolean value.'
            )

        matching_events = []
        cursor = self.event_collection.find(
            {
                "time": {
                    "$gte": start_time
                }
            }
        ).sort("time", ASCENDING)
        for event in cursor:
            matches = False
            try:
                # Mongo automatically assigns an _id to all events inserted into it. We strip it out here, since
                # we don't care about it.
                del event['_id']
                if event_filter is not None:
                    # Typically we will be grabbing all events of a particular type, however, you can use arbitrary
                    # logic to identify the events that are of interest.
                    matches = event_filter(event)
            except AssertionError:
                # allow the filters to use "assert" to filter out events
                continue
            else:
                if matches is None or matches:
                    matching_events.append(event)
        return matching_events

    def assert_matching_events_were_emitted(self, start_time=None, event_filter=None, number_of_matches=1):
        """Assert that at least `number_of_matches` events have passed the filter since `start_time`."""
        description = CollectedEventsDescription(
            'Not enough events match the filter:\n' + self.event_filter_to_descriptive_string(event_filter),
            functools.partial(self.get_matching_events_from_time, start_time=start_time, event_filter={})
        )

        self.assertTrue(
            self.matching_events_were_emitted(
                start_time=start_time, event_filter=event_filter, number_of_matches=number_of_matches
            ),
            description
        )

    def assert_no_matching_events_were_emitted(self, event_filter, start_time=None):
        """Assert that no events have passed the filter since `start_time`."""
        matching_events = self.get_matching_events_from_time(start_time=start_time, event_filter=event_filter)

        description = CollectedEventsDescription(
            'Events unexpected matched the filter:\n' + self.event_filter_to_descriptive_string(event_filter),
            lambda: matching_events
        )

        self.assertEquals(len(matching_events), 0, description)

    def assert_events_match(self, expected_events, actual_events):
        """
        Assert that each item in the expected events sequence matches its counterpart at the same index in the actual
        events sequence.
        """
        for expected_event, actual_event in zip(expected_events, actual_events):
            assert_event_matches(
                expected_event,
                actual_event,
                tolerate=EventMatchTolerates.lenient()
            )

    def relative_path_to_absolute_uri(self, relative_path):
        """Return an aboslute URI given a relative path taking into account the test context."""
        return urlparse.urljoin(BASE_URL, relative_path)

    def event_filter_to_descriptive_string(self, event_filter):
        """Find the source code of the callable or pretty-print the dictionary"""
        message = ''
        if callable(event_filter):
            file_name = '(unknown)'
            try:
                file_name = inspect.getsourcefile(event_filter)
            except TypeError:
                pass

            try:
                list_of_source_lines, line_no = inspect.getsourcelines(event_filter)
            except IOError:
                pass
            else:
                message = '{file_name}:{line_no}\n{hr}\n{event_filter}\n{hr}'.format(
                    event_filter=''.join(list_of_source_lines).rstrip(),
                    file_name=file_name,
                    line_no=line_no,
                    hr='-' * 20,
                )

        if not message:
            message = '{hr}\n{event_filter}\n{hr}'.format(
                event_filter=pprint.pformat(event_filter),
                hr='-' * 20,
            )

        return message
Ejemplo n.º 43
0
    # ===========================================================================

    # Set up data structures
    text_processor_queue = queue.Queue(BUF_SIZE)
    db = MongoClient()[db][collection]
    model_queue = queue.Queue(1)
    annot_resp = queue.Queue(1)
    te = threading.Event()
    d = corpora.Dictionary()
    mif = queue.Queue(1)
    keyword_queue = queue.Queue(BUF_SIZE)
    lim_queue = queue.Queue(BUF_SIZE)
    mess_queue = queue.Queue(BUF_SIZE)

    # Clear database
    db.drop()

    # Set up logging
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s (%(threadName)s) %(message)s',
                        filename='debug.log')

    logging.info('\n' * 5)
    logging.info('*' * 10 + 'ACTIVE STREAM' + '*' * 10)
    logging.info('Starting Application...')

    #for key in logging.Logger.manager.loggerDict:
    #    logging.getLogger(key).setLevel(logging.CRITICAL)

    # Initialize Threads
    streamer = Streamer(name='Streamer',
Ejemplo n.º 44
0
class EventsTestMixin(TestCase):
    """
    Helpers and setup for running tests that evaluate events emitted
    """
    def setUp(self):
        super(EventsTestMixin, self).setUp()
        self.event_collection = MongoClient()["test"]["events"]
        self.reset_event_tracking()

    def reset_event_tracking(self):
        """Drop any events that have been collected thus far and start collecting again from scratch."""
        self.event_collection.drop()
        self.start_time = datetime.now()

    @contextmanager
    def capture_events(self,
                       event_filter=None,
                       number_of_matches=1,
                       captured_events=None):
        """
        Context manager that captures all events emitted while executing a particular block.

        All captured events are stored in the list referenced by `captured_events`. Note that this list is appended to
        *in place*. The events will be appended to the list in the order they are emitted.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.

        `number_of_matches` tells this context manager when enough events have been found and it can move on. The
        context manager will not exit until this many events have passed the filter. If not enough events are found
        before a timeout expires, then this will raise a `BrokenPromise` error. Note that this simply states that
        *at least* this many events have been emitted, so `number_of_matches` is simply a lower bound for the size of
        `captured_events`.
        """
        start_time = datetime.utcnow()

        yield

        events = self.wait_for_events(start_time=start_time,
                                      event_filter=event_filter,
                                      number_of_matches=number_of_matches)

        if captured_events is not None and hasattr(captured_events,
                                                   'append') and callable(
                                                       captured_events.append):
            for event in events:
                captured_events.append(event)

    @contextmanager
    def assert_events_match_during(self,
                                   event_filter=None,
                                   expected_events=None):
        """
        Context manager that ensures that events matching the `event_filter` and `expected_events` are emitted.

        This context manager will filter out the event stream using the `event_filter` and wait for
        `len(expected_events)` to match the filter.

        It will then compare the events in order with their counterpart in `expected_events` to ensure they match the
        more detailed assertion.

        Typically `event_filter` will be an `event_type` filter and the `expected_events` list will contain more
        detailed assertions.
        """
        captured_events = []
        with self.capture_events(event_filter, len(expected_events),
                                 captured_events):
            yield

        self.assert_events_match(expected_events, captured_events)

    def wait_for_events(self,
                        start_time=None,
                        event_filter=None,
                        number_of_matches=1,
                        timeout=None):
        """
        Wait for `number_of_matches` events to pass the `event_filter`.

        By default, this will look at all events that have been emitted since the beginning of the setup of this mixin.
        A custom `start_time` can be specified which will limit the events searched to only those emitted after that
        time.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.

        `number_of_matches` lets us know when enough events have been found and it can move on. The function will not
        return until this many events have passed the filter. If not enough events are found before a timeout expires,
        then this will raise a `BrokenPromise` error. Note that this simply states that *at least* this many events have
        been emitted, so `number_of_matches` is simply a lower bound for the size of `captured_events`.

        Specifying a custom `timeout` can allow you to extend the default 30 second timeout if necessary.
        """
        if start_time is None:
            start_time = self.start_time

        if timeout is None:
            timeout = 30

        def check_for_matching_events():
            """Gather any events that have been emitted since `start_time`"""
            return self.matching_events_were_emitted(
                start_time=start_time,
                event_filter=event_filter,
                number_of_matches=number_of_matches)

        return Promise(
            check_for_matching_events,
            # This is a bit of a hack, Promise calls str(description), so I set the description to an object with a
            # custom __str__ and have it do some intelligent stuff to generate a helpful error message.
            CollectedEventsDescription(
                'Waiting for {number_of_matches} events to match the filter:\n{event_filter}'
                .format(
                    number_of_matches=number_of_matches,
                    event_filter=self.event_filter_to_descriptive_string(
                        event_filter),
                ),
                functools.partial(self.get_matching_events_from_time,
                                  start_time=start_time,
                                  event_filter={})),
            timeout=timeout).fulfill()

    def matching_events_were_emitted(self,
                                     start_time=None,
                                     event_filter=None,
                                     number_of_matches=1):
        """Return True if enough events have been emitted that pass the `event_filter` since `start_time`."""
        matching_events = self.get_matching_events_from_time(
            start_time=start_time, event_filter=event_filter)
        return len(matching_events) >= number_of_matches, matching_events

    def get_matching_events_from_time(self,
                                      start_time=None,
                                      event_filter=None):
        """
        Return a list of events that pass the `event_filter` and were emitted after `start_time`.

        This function is used internally by most of the other assertions and convenience methods in this class.

        The `event_filter` is expected to be a callable that allows you to filter the event stream and select particular
        events of interest. A dictionary `event_filter` is also supported, which simply indicates that the event should
        match that provided expectation.
        """
        if start_time is None:
            start_time = self.start_time

        if isinstance(event_filter, dict):
            event_filter = functools.partial(is_matching_event, event_filter)
        elif not callable(event_filter):
            raise ValueError(
                'event_filter must either be a dict or a callable function with as single "event" parameter that '
                'returns a boolean value.')

        matching_events = []
        cursor = self.event_collection.find({
            "time": {
                "$gte": start_time
            }
        }).sort("time", ASCENDING)
        for event in cursor:
            matches = False
            try:
                # Mongo automatically assigns an _id to all events inserted into it. We strip it out here, since
                # we don't care about it.
                del event['_id']
                if event_filter is not None:
                    # Typically we will be grabbing all events of a particular type, however, you can use arbitrary
                    # logic to identify the events that are of interest.
                    matches = event_filter(event)
            except AssertionError:
                # allow the filters to use "assert" to filter out events
                continue
            else:
                if matches is None or matches:
                    matching_events.append(event)
        return matching_events

    def assert_matching_events_were_emitted(self,
                                            start_time=None,
                                            event_filter=None,
                                            number_of_matches=1):
        """Assert that at least `number_of_matches` events have passed the filter since `start_time`."""
        description = CollectedEventsDescription(
            'Not enough events match the filter:\n' +
            self.event_filter_to_descriptive_string(event_filter),
            functools.partial(self.get_matching_events_from_time,
                              start_time=start_time,
                              event_filter={}))

        self.assertTrue(
            self.matching_events_were_emitted(
                start_time=start_time,
                event_filter=event_filter,
                number_of_matches=number_of_matches), description)

    def assert_no_matching_events_were_emitted(self,
                                               event_filter,
                                               start_time=None):
        """Assert that no events have passed the filter since `start_time`."""
        matching_events = self.get_matching_events_from_time(
            start_time=start_time, event_filter=event_filter)

        description = CollectedEventsDescription(
            'Events unexpected matched the filter:\n' +
            self.event_filter_to_descriptive_string(event_filter),
            lambda: matching_events)

        self.assertEquals(len(matching_events), 0, description)

    def assert_events_match(self, expected_events, actual_events):
        """
        Assert that each item in the expected events sequence matches its counterpart at the same index in the actual
        events sequence.
        """
        for expected_event, actual_event in zip(expected_events,
                                                actual_events):
            assert_event_matches(expected_event,
                                 actual_event,
                                 tolerate=EventMatchTolerates.lenient())

    def relative_path_to_absolute_uri(self, relative_path):
        """Return an aboslute URI given a relative path taking into account the test context."""
        return urlparse.urljoin(BASE_URL, relative_path)

    def event_filter_to_descriptive_string(self, event_filter):
        """Find the source code of the callable or pretty-print the dictionary"""
        message = ''
        if callable(event_filter):
            file_name = '(unknown)'
            try:
                file_name = inspect.getsourcefile(event_filter)
            except TypeError:
                pass

            try:
                list_of_source_lines, line_no = inspect.getsourcelines(
                    event_filter)
            except IOError:
                pass
            else:
                message = '{file_name}:{line_no}\n{hr}\n{event_filter}\n{hr}'.format(
                    event_filter=''.join(list_of_source_lines).rstrip(),
                    file_name=file_name,
                    line_no=line_no,
                    hr='-' * 20,
                )

        if not message:
            message = '{hr}\n{event_filter}\n{hr}'.format(
                event_filter=pprint.pformat(event_filter),
                hr='-' * 20,
            )

        return message
Ejemplo n.º 45
0
class ModelTestCase(AsyncTestCase):

    def setUp(self):
        super().setUp()
        self.database = "database"
        self.collection = "collection"
        self.model = Model(self.database, self.collection)
        self.client = MongoClient()[self.database][self.collection]

    def tearDown(self):
        self.client.drop()

    def test_setup_database(self):
        self.assertEqual(self.database, self.model.database)

    def test_setup_collection(self):
        self.assertEqual(self.collection, self.model.collection)

    def test_setup_created(self):
        self.assertIsInstance(self.model.data['created'], datetime.datetime)

    def test_setup_updated(self):
        self.assertIsInstance(self.model.data['updated'], datetime.datetime)

    def test_should_not_override_created(self):
        created = datetime.datetime.now()
        model = Model(self.database, self.collection, data={'created': created})
        self.assertEqual(created, model.data['created'])

    def test_should_not_override_updated(self):
        updated = datetime.datetime.now()
        model = Model(self.database, self.collection, data={'updated': updated})
        self.assertEqual(updated, model.data['updated'])

    @gen_test
    async def test_find(self):
        _id = self.client.insert_one({}).inserted_id
        model = await Model.find(self.database, self.collection, _id)
        self.assertEqual(_id, model.data['_id'])

    @gen_test
    async def test_insert(self):
        model = Model(self.database, self.collection)
        await model.insert()
        data = self.client.find_one({'_id': model.data['_id']})
        self.assertEqual(data, model.data)

    @gen_test
    async def test_update(self):
        _id = self.client.insert_one({}).inserted_id
        model = Model(self.database, self.collection, data={'_id': _id})
        expected = model.data.copy()
        expected['name'] = 'batman'
        await model.update({'name': 'batman'})
        self.assertEqual(expected, self.client.find_one({'_id': _id}))

    @gen_test
    async def test_delete(self):
        _id = self.client.insert_one({}).inserted_id
        model = Model(self.database, self.collection, data={'_id': _id})
        await model.delete()
        self.assertFalse(self.client.find_one({'_id': _id}))
Ejemplo n.º 46
0
                        ret[key_val[0].strip().rstrip(
                        )] = datetime.datetime.strptime(date, "%Y/%m/%d")
                    else:
                        ret[key_val[0].strip().rstrip().replace(
                            '.', '_')] = key_val[1].rstrip().strip()

    cvs_arr_to_upload.append(convert2unicode(copy.deepcopy(ret)))
    if 'date' in ret:
        ret['date'] = temp_date
    return ret


if __name__ == "__main__":

    file_name = '../' + sys.argv[1] + '.txt'
    if sys.argv[1] == 'PolicyCenter7.0': cvs_collection.drop()

    cvs_log_output = open(file_name).readlines()
    file_chunks = get_chunks(cvs_log_output)

    # Go through each file
    for chunk in file_chunks:
        # Get subset of file
        file_lines = cvs_log_output[chunk[0]:chunk[1]]
        # Go through each line of file
        for (i, line) in enumerate(file_lines):
            # A checkin appeared (cvs logs with '---' to break revision blocks)
            if (line.startswith('---')):
                s = i + 1
                e = s
                # Get subset of description block to parse
def test_mongo_dump_and_restore(docker_container, tmp_path):
    # Dummy data insertion
    docs = [
        {'name': 'col1doc1'},
        {'name': 'col1doc2'},
        {'name': 'col1doc3'},
    ]
    inserted_doc_ids = None
    port = 27020
    client = MongoClient(f'mongodb://localhost:{port}')
    uri = 'mongodb://localhost/tmpdb'
    dump_path = str(tmp_path / 'dump1.tgz')

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501
        wait_for_mongo_to_be_up(container)
        cmd_prefix = f'docker exec -i {container.id} '
        inserted_doc_ids = client.db1['col1'].insert_many(docs).inserted_ids

        # Get a dump after inserting the documents
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1') as (stream, stats):  # noqa: E501
            with open(dump_path, 'wb') as fp:
                fp.write(stream.read())

        assert stats.num_docs == 3

        # Doesn't count the number of docs if requested not to
        with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix, uri=uri, collection='col1', db='db1', count=False) as (_, stats):  # noqa: E501
            pass
        assert not stats.num_docs

        # Test if a dummy falsey command throws
        with pytest.raises(Exception) as exc:
            with mongo_utils.mongo_dump(cmd_prefix=cmd_prefix + ' false ', uri=uri, collection='col1', db='db1') as _:  # noqa: E501
                pass
        assert re.search('exited with error code', str(exc))

    with docker_container('mongo:4.0', ports={'27017/tcp': str(port)}, appdir=str(tmp_path)) as container:  # noqa: E501
        wait_for_mongo_to_be_up(container)

        def restore_dump(**kwargs):
            with open(dump_path, 'rb') as fp:
                return mongo_utils.mongo_restore(
                    stream=fp,
                    cmd_prefix=f'docker exec -i {container.id} ',
                    uri=uri,
                    collection='col2',
                    db='db2',
                    **kwargs,
                )

        # Insert one document and check if it wasn't overwritten
        col = MongoClient(f'mongodb://localhost:{port}').db2['col2']
        col.insert_one({
            '_id': inserted_doc_ids[0],
            'name': 'test',
        })
        stats = restore_dump()
        assert {d['name'] for d in col.find()} == {
            'test', 'col1doc2', 'col1doc3',
        }
        assert stats.num_docs == 2

        # Checking if duplicated docs are properly returned
        col.drop()
        col.insert_one({
            '_id': inserted_doc_ids[0],
            'name': 'new doc 1',
        })
        col.insert_one({'name': 'new doc 2'})
        stats = restore_dump()

        assert stats.duplicated_ids == [inserted_doc_ids[0]]

        r = col.delete_many({'_id': {'$in': stats.duplicated_ids}})
        LOGGER.warning(r.raw_result)
        stats = restore_dump()
        assert stats.num_docs == 1
        assert set(stats.duplicated_ids) == set(inserted_doc_ids[1:])
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'new doc 2', 'col1doc2', 'col1doc3',
        }

        # Now drop the collection
        col.insert_one({'name': 'new doc'})
        stats = restore_dump(drop=True)
        assert {d['name'] for d in col.find()} == {
            'col1doc1', 'col1doc2', 'col1doc3',
        }
        assert stats.num_docs == 3
Ejemplo n.º 48
0
class EventsTestMixin(object):
    """
    Helpers and setup for running tests that evaluate events emitted
    """
    def setUp(self):
        super(EventsTestMixin, self).setUp()
        self.event_collection = MongoClient()["test"]["events"]
        self.reset_event_tracking()

    def assert_event_emitted_num_times(self, event_name, event_time,
                                       event_user_id, num_times_emitted,
                                       **kwargs):
        """
        Tests the number of times a particular event was emitted.

        Extra kwargs get passed to the mongo query in the form: "event.<key>: value".

        :param event_name: Expected event name (e.g., "edx.course.enrollment.activated")
        :param event_time: Latest expected time, after which the event would fire (e.g., the beginning of the test case)
        :param event_user_id: user_id expected in the event
        :param num_times_emitted: number of times the event is expected to appear since the event_time
        """
        find_kwargs = {
            "name": event_name,
            "time": {
                "$gt": event_time
            },
            "event.user_id": int(event_user_id),
        }
        find_kwargs.update(
            {"event.{}".format(key): value
             for key, value in kwargs.items()})
        matching_events = self.event_collection.find(find_kwargs)
        self.assertEqual(matching_events.count(), num_times_emitted,
                         '\n'.join(str(event) for event in matching_events))

    def reset_event_tracking(self):
        """
        Resets all event tracking so that previously captured events are removed.
        """
        self.event_collection.drop()
        self.start_time = datetime.now()

    def get_matching_events(self, username, event_type):
        """
        Returns a cursor for the matching browser events related emitted for the specified username.
        """
        return self.event_collection.find({
            "username": username,
            "event_type": event_type,
            "time": {
                "$gt": self.start_time
            },
        })

    def verify_events_of_type(self,
                              username,
                              event_type,
                              expected_events,
                              expected_referers=None):
        """Verify that the expected events of a given type were logged.
        Args:
            username (str): The name of the user for which events will be tested.
            event_type (str): The type of event to be verified.
            expected_events (list): A list of dicts representing the events that should
                have been fired.
            expected_referers (list): A list of strings representing the referers for each event
                that should been fired (optional). If present, the actual referers compared
                with this list, checking that the expected_referers are the suffixes of
                actual_referers. For example, if one event is expected, specifying ["/account/settings"]
                will verify that the referer for the single event ends with "/account/settings".
        """
        EmptyPromise(
            lambda: self.get_matching_events(username, event_type).count(
            ) >= len(expected_events),
            "Waiting for the minimum number of events of type {type} to have been recorded"
            .format(type=event_type)).fulfill()

        # Verify that the correct events were fired
        cursor = self.get_matching_events(username, event_type)
        actual_events = []
        actual_referers = []
        for __ in range(0, cursor.count()):
            emitted_data = cursor.next()
            event = emitted_data["event"]
            if emitted_data["event_source"] == "browser":
                event = json.loads(event)
            actual_events.append(event)
            actual_referers.append(emitted_data["referer"])
        self.assertEqual(expected_events, actual_events)
        if expected_referers is not None:
            self.assertEqual(len(expected_referers), len(actual_referers),
                             "Number of expected referers is incorrect")
            for index, actual_referer in enumerate(actual_referers):
                self.assertTrue(
                    actual_referer.endswith(expected_referers[index]),
                    "Refer '{0}' does not have correct suffix, '{1}'.".format(
                        actual_referer, expected_referers[index]))
Ejemplo n.º 49
0
# -*- coding: utf-8 -*-
"""
Importazione del database dei ponti italiani.
Pubblicati da IK2ANE in formato Excel, salvare sul file pontixls.csv.
Rimuovere la prima riga (intestazione) e alcune delle ultime righe del file.
Vedere: http://www.ik2ane.it
Autore: Fabio Pani [IZ2UQF] <*****@*****.**>
Licenza d'uso: GNU/GPL v3 (vedere file LICENSE allegato)
"""
from pymongo import MongoClient, GEOSPHERE
from csv import reader
from wwl import is_valid_locator, convert_locator, get_longitude, get_latitude

ponti = MongoClient().hamradio.ponti
ponti.drop()  # ricostruisce la collection da capo

with open('pontixls.csv', 'rb') as f:
    csvfile = reader(f)
    for row in csvfile:
        doc = dict(nome=row[0].strip(),
                   frequenza=row[1].strip(),
                   shift=row[2].strip(),
                   tono=row[3].strip(),
                   regione=row[4].strip().lower(),
                   provincia=row[5].strip().upper(),
                   localita=row[6].strip(),
                   gruppo=row[7].strip(),
                   identificatore=row[8].strip(),
                   traslatore=row[9].strip(),
                   locator=row[10].strip(),
                   gestore=row[15].strip())
Ejemplo n.º 50
0
# -*- coding: utf-8 -*-
"""
Importazione del database dei ponti italiani.
Pubblicati da IK2ANE in formato Excel, salvare sul file pontixls.csv.
Rimuovere la prima riga (intestazione) e alcune delle ultime righe del file.
Vedere: http://www.ik2ane.it
Autore: Fabio Pani [IZ2UQF] <*****@*****.**>
Licenza d'uso: GNU/GPL v3 (vedere file LICENSE allegato)
"""
from pymongo import MongoClient, GEOSPHERE
from csv import reader
from wwl import is_valid_locator, convert_locator, get_longitude, get_latitude

ponti = MongoClient().hamradio.ponti
ponti.drop()  # ricostruisce la collection da capo

with open('pontixls.csv', 'rb') as f:
    csvfile = reader(f)
    for row in csvfile:
        doc = dict(nome=row[0].strip(),
                   frequenza=row[1].strip(),
                   shift=row[2].strip(),
                   tono=row[3].strip(),
                   regione=row[4].strip().lower(),
                   provincia=row[5].strip().upper(),
                   localita=row[6].strip(),
                   gruppo=row[7].strip(),
                   identificatore=row[8].strip(),
                   traslatore=row[9].strip(),
                   locator=row[10].strip(),
                   gestore=row[15].strip())