Beispiel #1
0
 def _clean_json(self, field):
     value = self.cleaned_data[field]
     try:
         loads(value)
     except ValueError as exc:
         raise forms.ValidationError(_("Unable to parse JSON: %s") % exc)
     return value
Beispiel #2
0
        def on_message(self, msg):
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug('Got broker message:[{}]'.format(msg))

            if msg.type == 'message':

                # Replace payload with stuff read off the KVDB in case this is where the actual message happens to reside.
                if msg.channel in NEEDS_TMP_KEY:
                    tmp_key = '{}.tmp'.format(msg.data)

                    if self.lua_container.run_lua('zato.rename_if_exists', [msg.data, tmp_key]) == CODE_NO_SUCH_FROM_KEY:
                        payload = None
                    else:
                        payload = self.kvdb.conn.get(tmp_key)
                        self.kvdb.conn.delete(tmp_key)  # Note that it would've expired anyway
                        if not payload:
                            logger.warning('No KVDB payload for key [{}] (already expired?)'.format(tmp_key))
                        else:
                            payload = loads(payload)
                else:
                    payload = loads(msg.data)

                if payload:
                    payload = Bunch(payload)
                    if logger.isEnabledFor(logging.DEBUG):
                        logger.debug('Got broker message payload [{}]'.format(payload))
                        
                    callback = self.topic_callbacks[msg.channel]
                    spawn(callback, payload)

                else:
                    if logger.isEnabledFor(logging.DEBUG):
                        logger.debug('No payload in msg:[{}]'.format(msg))
Beispiel #3
0
    def __init__(self, model):
        self.app = current_app._get_current_object()
        self.name = model.name
        self.task = model.task
        self.schedule = model.schedule
        try:
            self.args = loads(model.args or "[]")
            self.kwargs = loads(model.kwargs or "{}")
        except ValueError:
            logging.error("Failed to serialize arguments for %s.", self.name, exc_info=1)
            logging.warning("Disabling %s", self.name)
            model.no_changes = True
            model.enabled = False
            model.save()

        self.options = {
            "queue": model.queue,
            "exchange": model.exchange,
            "routing_key": model.routing_key,
            "expires": model.expires,
        }
        self.total_run_count = model.total_run_count
        self.model = model

        if not model.last_run_at:
            model.last_run_at = self._default_now()
        orig = self.last_run_at = model.last_run_at
        if not is_naive(self.last_run_at):
            self.last_run_at = self.last_run_at.replace(tzinfo=None)
        assert orig.hour == self.last_run_at.hour  # timezone sanity
Beispiel #4
0
 def test_list_tasks(self):
     with self.app.app_context():
         tq = apis.TaskQueue('test')
         [tq.add_task({'method': 'GET',
                       'url': 'http://httpbin.org/get'})
          for i in range(72)]
     rv = self.client.get('/apps/test/taskqueues/default/tasks?limit=a')
     self.assertEqual(rv.status_code, 422)
     err = anyjson.loads(rv.data)
     self.assertEqual(err['error_code'], 200101)
     self.assertEqual(err['error_desc'], 'Validation failure')
     self.assertEqual(err['request_uri'], 'http://localhost/apps/test/'
                      'taskqueues/default/tasks?limit=a')
     self.assertTrue('error_detail' in err)
     rv = self.client.get('/apps/test/taskqueues/default/tasks')
     self.assertEqual(rv.status_code, 200)
     result = anyjson.loads(rv.data)
     self.assertTrue('items' in result)
     self.assertEqual(result['total'], 72)
     self.assertEqual(len(result['items']), 50)
     self.assertEqual(result['items'][0]['id'], 1)
     self.assertEqual(result['items'][49]['id'], 50)
     self.assertEqual(result['items'][49]['status'], 'new')
     rv = self.client.get('/apps/test/taskqueues/default/tasks?offset=50')
     self.assertEqual(rv.status_code, 200)
     result = anyjson.loads(rv.data)
     self.assertTrue('items' in result)
     self.assertEqual(result['total'], 72)
     self.assertEqual(len(result['items']), 22)
     self.assertEqual(result['items'][0]['id'], 51)
     self.assertEqual(result['items'][21]['id'], 72)
Beispiel #5
0
def find_user_race():
	with open('data/' + my.DATA_FOLDER + 'user_homes.json', 'rb') as fp:
		homes = anyjson.loads(fp.read())

	global loc_data
	with open('data/' + my.DATA_FOLDER + 'loc_data.json', 'rb') as fp:
		loc_data = anyjson.loads(fp.read())

	#for user in homes.items()[:100]:
	#	_find_race(user)
	pool = Pool(my.PROCESSES)
	user_race = pool.map(_find_race, homes.items())
	user_race = filter(None, user_race)

	races = {
		'w' : [],
		'b' : [],
		'a' : [],
		'h' : [],
		'o' : []
	}
	for user_id, race in user_race:
		races[race].append(user_id)

	with open('data/' + my.DATA_FOLDER + 'user_race.json', 'wb') as fp:
		fp.write(jsb.beautify(anyjson.dumps(races)))
Beispiel #6
0
 def fetch_nexson(self, study_id, output_filepath=None, store_raw=False):
     '''Calls export_gzipNexSON URL and unzips response.
     Raises HTTP error, gzip module error, or RuntimeError
     '''
     if study_id.startswith('pg_'):
         study_id = study_id[3:] #strip pg_ prefix
     uri = self.domain + '/study/export_gzipNexSON.json/' + study_id
     _LOG.debug('Downloading %s using "%s"\n', study_id, uri)
     resp = requests.get(uri,
                         headers=GZIP_REQUEST_HEADERS,
                         allow_redirects=True)
     resp.raise_for_status()
     try:
         uncompressed = gzip.GzipFile(mode='rb',
                                      fileobj=StringIO(resp.content)).read()
         results = uncompressed
     except:
         raise
     if is_str_type(results):
         if output_filepath is None:
             return anyjson.loads(results)
         else:
             if store_raw:
                 write_to_filepath(results, output_filepath)
             else:
                 write_as_json(anyjson.loads(results), output_filepath)
             return True
     raise RuntimeError('gzipped response from phylografter export_gzipNexSON.json, but not a string is:', results)
Beispiel #7
0
def get_race_points():
	global homes
	with open('data/' + my.DATA_FOLDER + 'user_homes.json', 'rb') as fp:
		homes = anyjson.loads(fp.read())

	with open('data/' + my.DATA_FOLDER + 'user_race.json', 'rb') as fp:
		user_race = anyjson.loads(fp.read())
	user_ids = list(itertools.chain(*user_race.values()))

	global race_lookup
	race_lookup = {}
	for race, uids in user_race.items():
		for uid in uids:
			race_lookup[uid] = race

	pool = Pool(my.PROCESSES)
	points = pool.map(_get_points, user_ids)
	
	race_points = {
		'w': [], 
		'b': [], 
		'a': [], 
		'h': [], 
		'o': []}

	for race, pts in points:
		race_points[race].extend(pts)
	for race in race_points: print race, ':', len(race_points[race])

	with open('data/' + my.DATA_FOLDER + 'race_points.json', 'wb') as fp:
		fp.write(anyjson.dumps(race_points))
Beispiel #8
0
def _plot_pies(race):
	with open('data/' + my.DATA_FOLDER + 'user_race.json', 'rb') as fp:
		user_ids = anyjson.loads(fp.read())[race]

	with open('data/' + my.DATA_FOLDER + 'visits.json', 'rb') as fp:
		visits = anyjson.loads(fp.read())

	sizes = [0]*6
	for user_id in user_ids:
		v = visits[str(user_id)]
		for i in range(6):
			sizes[i] += v[i]


	labels = ['White', 'Black', 'Asian', 'Hispanic', '', 'None']
	colors = ['lightskyblue', 'yellowgreen', '#C94949', 'orange', '#A65628', '#AAAAAA']

	fig=plt.figure(figsize=(6, 10))
	#fig.set_tight_layout(True)
	plt.subplots_adjust(left=0.125, right=0.875, top=1., bottom=0.)
	ax=fig.add_subplot(211)
	ax.pie(	sizes[:-1], labels=labels[:-1], colors=colors[:-1],
			autopct='%1.1f%%', shadow=True, startangle=90)
	ax.axis('equal')
	ax.axis('off')

	ax=fig.add_subplot(212)
	ax.pie(	sizes, labels=labels, colors=colors,
			autopct='%1.1f%%', shadow=True, startangle=90)
	ax.axis('equal')
	ax.axis('off')

	plt.savefig('data/' + my.DATA_FOLDER + 'visit_pie_' + race + '.png')
Beispiel #9
0
def _make_volume_mat(id):
	''''''
	print '\n', id, '\n'
	volume_mat = {}
	with open('data/' + my.DATA_FOLDER + 'sets.json', 'rb') as fp:
		sets = anyjson.loads(fp.read()).items()

	with open('data/' + my.DATA_FOLDER + 'time.json', 'rb') as fp:
		time = anyjson.loads(fp.read())
	ts_start = datetime.strptime(time['ts_start'], my.TS_FORMAT)
	ts_end = datetime.strptime(time['ts_end'], my.TS_FORMAT)
	diff = ts_end - ts_start
	diff = int(diff.total_seconds())
	combine = 120
	x, x_ticks, x_tickslabels = _make_x(ts_start, diff, combine=combine)
	
	for pid, st in sets:
		offsets = _get_offsets(id, st)
		print pid, st, len(offsets)
		y = dict((i, 0) for i in x)
		for o in offsets:
			idx = int(o/combine)
			if idx in y: y[idx] += 1
		y = y.values()
		volume_mat[pid] = y

	path = 'data/' + my.DATA_FOLDER + 'volume_mat/'
	if not os.path.exists(path): os.makedirs(path)
	with open(path + str(id) + '.json', 'wb') as fp:
		fp.write(jsb.beautify(anyjson.dumps(volume_mat)))
Beispiel #10
0
 def test_insert_task(self):
     with self.app.app_context():
         tq = apis.TaskQueue('test')
         tq.add_task({'method': 'GET',
                      'url': 'http://httpbin.org/get'},
                     cname='testtask')
     task_dict = {
         'request': {'url': 'http://httpbin.org/get'},
         'cname': 'testtask',
         'eta': '10:42'
     }
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 409)
     self.assertEqual(anyjson.loads(rv.data)['error_code'], 207203)
     task_dict['cname'] = 'testtask1'
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 201)
     task = anyjson.loads(rv.data)
     self.assertEqual(task['id'], 2)
     self.assertEqual(task['request']['method'], 'GET')
     self.assertEqual(task['cname'], 'testtask1')
     now = datetime.now()
     eta_expect = utc.normalize(
         get_localzone().localize(
             datetime(now.year, now.month, now.day, 10, 42)
         )
     ).isoformat()
     self.assertEqual(task['eta'], eta_expect)
     self.assertTrue(isinstance(task['countdown'], float))
Beispiel #11
0
    def __init__(self, model):
        self.app = current_app._get_current_object()
        self.name = model.name
        self.task = model.task
        self.schedule = model.schedule
        try:
            self.args = loads(model.args or '[]')
            self.kwargs = loads(model.kwargs or '{}')
        except ValueError:
            # disable because of error deserializing args/kwargs
            model.no_changes = True
            model.enabled = False
            model.save()
            raise

        self.options = {'queue': model.queue,
                        'exchange': model.exchange,
                        'routing_key': model.routing_key,
                        'expires': model.expires}
        self.total_run_count = model.total_run_count
        self.model = model

        if not model.last_run_at:
            model.last_run_at = self._default_now()
        orig = self.last_run_at = model.last_run_at
        if not is_naive(self.last_run_at):
            self.last_run_at = self.last_run_at.replace(tzinfo=None)
        assert orig.hour == self.last_run_at.hour  # timezone sanity
Beispiel #12
0
    def set_data(self, payload, has_zato_env):
        response = payload.get('response')
        if response:
            if has_zato_env:
                self.inner_service_response = payload['response'].decode('base64')
                try:
                    data = loads(self.inner_service_response)
                except ValueError:
                    # Not a JSON response
                    self.data = self.inner_service_response
                else:
                    if isinstance(data, dict):
                        data_keys = data.keys()
                        if len(data_keys) == 1:
                            data_key = data_keys[0]
                            if isinstance(data_key, basestring) and data_key.startswith('zato'):
                                self.data = data[data_key]
                            else:
                                self.data = data
                        else:
                            self.data = data
                    else:
                        self.data = data
            else:
                try:
                    data = loads(response)
                except ValueError:
                    # Not a JSON response
                    self.data = response
                else:
                    self.data = data

            return True
Beispiel #13
0
    def __init__(self, model):
        self.app = current_app._get_current_object()
        self.name = model.name
        self.task = model.task
        try:
            self.schedule = model.schedule
        except model.DoesNotExist:
            logger.error('Schedule was removed from database')
            logger.warning('Disabling %s', self.name)
            self._disable(model)
        try:
            self.args = loads(model.args or '[]')
            self.kwargs = loads(model.kwargs or '{}')
        except ValueError:
            logging.error('Failed to serialize arguments for %s.', self.name,
                          exc_info=1)
            logging.warning('Disabling %s', self.name)
            self._disable(model)

        self.options = {'queue': model.queue,
                        'exchange': model.exchange,
                        'routing_key': model.routing_key,
                        'expires': model.expires}
        self.total_run_count = model.total_run_count
        self.model = model

        if not model.last_run_at:
            model.last_run_at = self._default_now()
        orig = self.last_run_at = model.last_run_at
        if not is_naive(self.last_run_at):
            self.last_run_at = self.last_run_at.replace(tzinfo=None)
        assert orig.hour == self.last_run_at.hour  # timezone sanity
Beispiel #14
0
def data_stat():
	with open('data/' + my.DATA_FOLDER + 'tract_population.json', 'rb') as fp:
		population = anyjson.loads(fp.read())
	with open('data/' + my.DATA_FOLDER + 'city_tracts.json', 'rb') as fp:
		polygons = anyjson.loads(fp.read())

	total_pop = []
	total_area = []
	for gid, json_poly in polygons.iteritems():
		json_poly_str = anyjson.dumps(json_poly)
		poly = ogr.CreateGeometryFromJson(json_poly_str)
		total_area.append( poly.GetArea() )

		pop = population[gid]
		total_pop.append( sum(pop) )

	fig = plt.figure(figsize=(10, 10))
	fig.set_tight_layout(True)
	ax = fig.add_subplot(211)
	ax.hist(total_pop, bins=100, range=(0, 10000))
	ax.set_title('Population')

	ax = fig.add_subplot(212)
	ax.hist(total_area, bins=100, range=(0, 0.01))
	ax.set_title('Area')

	plt.savefig('data/' + my.DATA_FOLDER + 'data_stat' + '.pdf')
Beispiel #15
0
def find_tracts_with_users():
	with open('data/' + my.DATA_FOLDER + 'user_homes.json', 'rb') as fp:
		homes = anyjson.loads(fp.read())
	homes = dict( ( int(h[0]), ogr.CreateGeometryFromWkt('POINT(%s %s)' \
						% (h[1][1], h[1][0])) )  for h in homes.items() )

	with open('data/' + my.DATA_FOLDER + 'city_tracts.json', 'rb') as fp:
		polygons = anyjson.loads(fp.read())

	user_counts = dict( (gid, 0) for gid in polygons.keys())
	users_in_tracts = []
	tracts_with_users = []
	tract_users = dict( (gid, []) for gid in polygons.keys())

	for gid, json_poly in polygons.iteritems():
		json_poly_str = anyjson.dumps(json_poly)
		poly = ogr.CreateGeometryFromJson(json_poly_str)
		
		for user_id, h in homes.items():
			if h.Within(poly):
				user_counts[gid] += 1
				users_in_tracts.append(user_id)
				tract_users[gid].append(user_id)
				del homes[user_id]

	tracts_with_users = [gid for gid, v in user_counts.items() if v != 0]

	with open('data/' + my.DATA_FOLDER + 'user_counts.json', 'wb') as fp:
		fp.write( jsb.beautify( anyjson.dumps(user_counts) ) )
	with open('data/' + my.DATA_FOLDER + 'tract_users.json', 'wb') as fp:
		fp.write( jsb.beautify( anyjson.dumps(tract_users) ) )
	with open('data/' + my.DATA_FOLDER + 'users_in_tracts.json', 'wb') as fp:
		fp.write( anyjson.dumps(users_in_tracts) )
	with open('data/' + my.DATA_FOLDER + 'tracts_with_users.json', 'wb') as fp:
		fp.write( anyjson.dumps(tracts_with_users) )
Beispiel #16
0
 def test_dispatch_task(self):
     tq = TaskQueue('test')
     tq.bind_redis(self.conn1)
     task = Task({'method': 'GET',
                  'url': 'http://httpbin.org'},
                 1)
     idx, task_dict = task._to_redis()
     metakey = tq._TaskQueue__metakey(idx)
     self.assertTrue(self.conn1.hmset(metakey, task_dict))
     task.bind_taskqueue(tq)
     tq._dispatch_task(task)
     r = self.conn1.hmget(metakey, 'status', 'request',
                          'cname', 'on_complete', 'on_failure',
                          'on_success', 'uuid')
     self.assertEqual(r[0:1] + r[2:-1], [b'"new"', b'null', b'null',
                                         b'"__report__"', b'null'])
     self.assertEqual(anyjson.loads(not_bytes(r[1])),
                      {"url": "http://httpbin.org", "method": "GET"})
     clobj = anyjson.loads(not_bytes(self.conn0.lindex('celery', 0)))
     self.assertEqual(clobj['properties']['correlation_id'],
                      anyjson.loads(not_bytes(r[-1])))
     task = Task({'method': 'GET',
                  'url': 'http://httpbin.org'},
                 2, countdown=10)
     idx, task_dict = task._to_redis()
     metakey = tq._TaskQueue__metakey(idx)
     self.assertTrue(self.conn1.hmset(metakey, task_dict))
     task.bind_taskqueue(tq)
     tq._dispatch_task(task)
     self.assertEqual(self.conn1.hget(metakey, 'status'), b'"delayed"')
Beispiel #17
0
    def __init__(self, model):
        self.app = current_app._get_current_object()
        self.name = model.name
        self.task = model.task
        self.schedule = model.schedule
        try:
            self.args = loads(model.args or '[]')
            self.kwargs = loads(model.kwargs or '{}')
        except ValueError:
            logging.exception(exc)
            logging.error('Failed to serialize arguments for %s.', self.name)
            logging.warning('Disabling %s', self.name)
            model.no_changes = True
            model.enabled = False
            model.save()

        self.options = {'queue': model.queue,
                        'exchange': model.exchange,
                        'routing_key': model.routing_key,
                        'expires': model.expires}
        self.total_run_count = model.total_run_count
        self.model = model

        if not model.last_run_at:
            model.last_run_at = self._default_now()
        orig = self.last_run_at = model.last_run_at
        if not is_naive(self.last_run_at):
            self.last_run_at = self.last_run_at.replace(tzinfo=None)
        assert orig.hour == self.last_run_at.hour  # timezone sanity
Beispiel #18
0
def make_picks_sets():
	''''''
	legend = {}
	sets = {} 
	with open('data/' + my.DATA_FOLDER + 'nba_picks.txt', 'rb') as fp:
		cr = csv.reader(fp, delimiter=',')
		pick_players = [row[0] for row in cr]
	with open('data/' + my.DATA_FOLDER + 'player_legend.json', 'rb') as fp:
		player_legend = anyjson.loads(fp.read()).items()
	with open('data/' + my.DATA_FOLDER + 'player_sets.json', 'rb') as fp:
		player_sets = anyjson.loads(fp.read())

	id = 0
	for name in pick_players:
		id += 1
		screen_name = None
		pid = 0
		for id2, n2 in player_legend:
			if _match_names(name, n2):
				player_legend.remove((id2, n2))
				screen_name = n2
				pid = id2
		#print name, '\t', screen_name, pid
		legend[id] = screen_name
		sets[id] = player_sets[pid]

	with open('data/' + my.DATA_FOLDER + 'picks_legend.json', 'wb') as fp:
		fp.write(anyjson.dumps(legend))
	with open('data/' + my.DATA_FOLDER + 'picks_sets.json', 'wb') as fp:
		fp.write(anyjson.dumps(sets))
Beispiel #19
0
 def test_scheduled_task(self):
     task_dict = {
         'request': {'url': 'http://httpbin.org/get'},
         'schedule': '1234567',
         'cname': 'haha'
     }
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 422)
     task_dict['schedule'] = 'every 30 second'
     task_dict.pop('cname')
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 422)
     task_dict['cname'] = 'test schedule'
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 201)
     task = anyjson.loads(rv.data)
     self.assertEqual(task['id'], 1)
     self.assertEqual(task['schedule'], 'every 30.0 seconds')
     task_dict['schedule'] = '*/1 1-5,8 * * *'
     task_dict['cname'] = 'test crontab'
     rv = self.client.post(
         '/apps/test/taskqueues/default/tasks',
         data=anyjson.dumps(task_dict))
     self.assertEqual(rv.status_code, 201)
     task = anyjson.loads(rv.data)
     self.assertEqual(task['id'], 2)
     self.assertEqual(task['schedule'], '*/1 1-5,8 * * *')
Beispiel #20
0
def make_artificial_tweets():
	with open('data/' + my.DATA_FOLDER + 'user_homes.json', 'rb') as fp:
		user_homes = anyjson.loads(fp.read())
	path = 'data/' + my.DATA_FOLDER + 'data/'
	with open(path + 'artificial_indexes.json', 'rb') as fp:
		indexes = anyjson.loads(fp.read())

	with open('data/' + my.DATA_FOLDER + 'artificial/' + \
				'artificial_points.json', 'rb') as fp:
		artificial_points = anyjson.loads(fp.read())
	shuffle(artificial_points)

	tweets = {}
	for user_id, index in indexes.iteritems():
		tweets[user_id] = []
		home = user_homes[str(user_id)]
		home = ( round(home[0], 4), round(home[1], 4))
		for i in index:
			tweets[user_id].append((home[0] + artificial_points[i][0],
									home[1] + artificial_points[i][1]))
		print user_id, len(tweets[user_id])

	with open(path + 'artificial_tweets.json', 'wb') as fp:
		fp.write( anyjson.dumps(tweets) )

	tweets = list(itertools.chain(*tweets.values()))

	with open(path + 'artificial_all_tweets.json', 'wb') as fp:
		fp.write( anyjson.dumps(tweets) )
Beispiel #21
0
 def on_message(self, msg):
     if logger.isEnabledFor(logging.DEBUG):
         logger.info('Got broker message:[{}]'.format(msg))
     
     if msg.type == 'message':
 
         # Replace payload with stuff read off the KVDB in case this is where the actual message happens to reside.
         if msg.channel == self._to_parallel_any_topic:
             tmp_key = '{}.tmp'.format(msg.data)
             
             try:
                 self.kvdb.conn.rename(msg.data, tmp_key)
             except redis.ResponseError, e:
                 if e.message != 'ERR no such key': # Doh, I hope Redis guys don't change it out of a sudden :/
                     raise
                 else:
                     payload = None
             else:
                 payload = self.kvdb.conn.get(tmp_key)
                 self.kvdb.conn.delete(tmp_key) # Note that it would've expired anyway
                 if not payload:
                     logger.warning('No KVDB payload for key [{}] (already expired?)'.format(tmp_key))
                 else:
                     payload = loads(payload)
         else:
             payload = loads(msg.data)
             
         if payload:
             payload = Bunch(payload)
             if logger.isEnabledFor(logging.DEBUG):
                 logger.debug('Got broker message payload [{}]'.format(payload))
                 
             return self.topic_callbacks[msg.channel](payload)
Beispiel #22
0
def	_generate_mat_unigram_pos(name):
	# Generate unigram with POS tag feature matrix
	lex_dir = 'data/' + my.DATA_FOLDER + '_lexicon/' + name + '/'
	with open(lex_dir + 'lexicon_unigram_pos', 'r') as fp1:
		lexicon = anyjson.loads(fp1.read())

	doc_path = 'data/' + my.DATA_FOLDER + '_tokenized/' + name + '/'
	doc_names = os.listdir(doc_path)
	if '.DS_Store' in doc_names:
		doc_names.remove('.DS_Store')

	mat = []
	for doc_id in doc_names:
		doc = []
		with open(doc_path + doc_id, 'r') as fp1:
			sentences = anyjson.loads(fp1.read())
		sentences = [['%s__%s' % (token[0].lower(), token[1]) for token in sent] for sent in sentences]
		[doc.extend(sent) for sent in sentences]
		fdist = nltk.FreqDist(doc)
		row = [fdist[token] if token in fdist else 0 for token in lexicon]
		mat.append(row)

	out_dir = 'data/' + my.DATA_FOLDER + '_feature_mat/' + name + '/'
	if not os.path.exists(out_dir):
		os.makedirs(out_dir)
	with open(out_dir + 'unigram_pos', 'w') as fp1:
		fp1.write(anyjson.dumps(mat))
Beispiel #23
0
    def render_GET(self, request):
        try:
            # log.msg("Handle request: %s" % request.path, logLevel=logging.DEBUG)
            request.setHeader("Content-Type", "application/json")

            if request.path == "/add/":
                params = json.loads(request.args["data"][0])
                response = self.__add_scrapers__(params)
                return json.dumps(response)

            elif request.path == "/list/":
                response = self.__list_scrapers__()
                return json.dumps(response)

            elif request.path == "/remove/":
                params = json.loads(request.args["data"][0])
                response = self.__remove_scrapers__(params)
                return json.dumps(response)
            elif request.path == "/ping/":
                return "pong"
            elif request.path == "/log/":
                logfile = open("log/daily-log.log")
                log_message = logfile.read()
                logfile.close()
                return log_message
            else:
                # log.msg("Wrong API path '%s'" % request.path,logLevel=logging.DEBUG)
                return json.dumps({"error": True, "message": "Wrong API path '%s'" % request.path})

        except Exception:
            # log.msg("Error: %s" % traceback.format_exc(),logLevel=logging.WARNING)
            return json.dumps({"error": True, "message": traceback.format_exc()})
Beispiel #24
0
def plot_top_players():
	''''''
	with open('data/' + my.DATA_FOLDER + 'player_legend.json', 'rb') as fp:
		legend = anyjson.loads(fp.read())
	with open('data/' + my.DATA_FOLDER + 'player_counts.json', 'rb') as fp:
		counts = anyjson.loads(fp.read())
	counts = sorted(counts.items(), key=lambda x: x[1], reverse=True)

	x = [x[1] for x in counts[:my.TOP_N]]
	labels = list(legend['legend'][x[0]] for x in counts[:my.TOP_N])
	#labels = list(x[0] for x in counts[:my.TOP_N])
	others = sum(x[1] for x in counts[my.TOP_N:])
	x.append(others)
	labels.append('Others')
	my_norm = matplotlib.colors.Normalize(min(x), max(x[:-1])+x[1])
	my_cmap = matplotlib.cm.get_cmap('Set2')
	
	fig = plt.figure(figsize=(15,15))
	#fig.set_tight_layout(True)
	ax = fig.add_subplot(111) 
	ax.autoscale_view()
	ax.pie(x, labels=labels, colors=my_cmap(my_norm(x)), 
			startangle=-50)

	filename = 'top_' + str(my.TOP_N) + '_players'
	plt.savefig('data/' + my.DATA_FOLDER + filename + '.' + 'pdf')

	tex = []
	for i in range(len(x)):
		tex.append(' & ' + str(i + 1) + '\t & ' \
			+ labels[i] + '\t & ' \
			+ '{:,d}'.format(x[i]) + '\t \\\ ')
	with open('data/' + my.DATA_FOLDER + 'tex_' + filename + '.tex', 'wb') as fp:
		fp.write('\n'.join(tex))
Beispiel #25
0
    def run(self, name, *_, **kw):
        # Positional args.
        args = kw.get("args") or ()
        if isinstance(args, basestring):
            args = anyjson.loads(args)

        # Keyword args.
        kwargs = kw.get("kwargs") or {}
        if isinstance(kwargs, basestring):
            kwargs = anyjson.loads(kwargs)

        # Expires can be int/float.
        expires = kw.get("expires") or None
        try:
            expires = float(expires)
        except (TypeError, ValueError):
            # or a string describing an ISO 8601 datetime.
            try:
                expires = maybe_iso8601(expires)
            except (TypeError, ValueError):
                raise

        res = self.app.send_task(name, args=args, kwargs=kwargs,
                                 countdown=kw.get("countdown"),
                                 serializer=kw.get("serializer"),
                                 queue=kw.get("queue"),
                                 exchange=kw.get("exchange"),
                                 routing_key=kw.get("routing_key"),
                                 eta=maybe_iso8601(kw.get("eta")),
                                 expires=expires)
        self.out(res.id)
Beispiel #26
0
    def message(self):
        from ..utils import replace_cid_and_change_headers

        to = anyjson.loads(self.to)
        cc = anyjson.loads(self.cc)
        bcc = anyjson.loads(self.bcc)

        html, text, inline_headers = replace_cid_and_change_headers(self.body, self.original_message_id)

        email_message = SafeMIMEMultipart('related')
        email_message['Subject'] = self.subject
        email_message['From'] = self.send_from.to_header()

        if to:
            email_message['To'] = ','.join(list(to))
        if cc:
            email_message['cc'] = ','.join(list(cc))
        if bcc:
            email_message['bcc'] = ','.join(list(bcc))

        email_message_alternative = SafeMIMEMultipart('alternative')
        email_message.attach(email_message_alternative)

        email_message_text = SafeMIMEText(text, 'plain', 'utf-8')
        email_message_alternative.attach(email_message_text)

        email_message_html = SafeMIMEText(html, 'html', 'utf-8')
        email_message_alternative.attach(email_message_html)

        try:
            add_attachments_to_email(self, email_message, inline_headers)
        except IOError:
            return False

        return email_message
Beispiel #27
0
def check_tweet_counts():
	path = 'data/' + my.DATA_FOLDER + 'data/'
	with open(path + 'all_tweets.json', 'rb') as fp:
		tweets = anyjson.loads(fp.read())
	print len(tweets)
	with open(path + 'artificial_all_tweets.json', 'rb') as fp:
		artificial_tweets = anyjson.loads(fp.read())
	print len(artificial_tweets)
Beispiel #28
0
 def _clean_json(self, field):
     value = self.cleaned_data[field]
     try:
         loads(value)
     except ValueError, exc:
         raise forms.ValidationError(
             _('Unable to parse JSON: %s') % exc,
         )
Beispiel #29
0
def find_race_visits():
	with open('data/' + my.DATA_FOLDER + 'data/' + 'gids.json', 'rb') as fp:
		gids = anyjson.loads(fp.read())
	with open('data/' + my.DATA_FOLDER + 'tract_race.json', 'rb') as fp:
		tract_race = anyjson.loads(fp.read())

	race_idx = {'w': [], 'b': [], 'a': [], 'h': [], 'o': []}
	for race in race_idx.keys():
		this_race_tracts = [g for g, r in tract_race.iteritems() if r==race]
		race_idx[race] = [gids.index(g) for g in this_race_tracts if g in gids]
	print dict((r, len(i)) for r, i in race_idx.iteritems())

	path = 'data/' + my.DATA_FOLDER + 'data/'
	with open(path + 'visit_mat_tweets' + '.json', 'rb') as fp:
		visit_mat = anyjson.loads(fp.read())
	with open(path + 'visit_mat_artificial_tweets' + '.json', 'rb') as fp:
		visit_mat_arti = anyjson.loads(fp.read())

	lookup = {'w': 0, 'b': 1, 'a': 2, 'h': 3, 'o': 4}
	colors = [ my.COLOR[r] for r in ['w', 'b', 'a', 'h', 'o'] ]
	
	for race in ['w', 'b', 'a', 'h']:
		sizes = [0]*5
		sizes_arti = [0]*5

		for idx_from in race_idx[race]:
			for race_to in ['w', 'b', 'a', 'h']:
				for idx_to in race_idx[race_to]:
					if idx_from != idx_to:
						sizes[ lookup[race_to] ] += visit_mat[idx_from][idx_to]
						sizes_arti[ lookup[race_to] ] += \
										visit_mat_arti[idx_from][idx_to]
		sizes = [round(float(s)/sum(sizes), 4) for s in sizes]
		sizes_arti = [round(float(s)/sum(sizes_arti), 4) for s in sizes_arti]
		print sizes
		print sizes_arti

		# Plot
		#
		fig=plt.figure()
		fig.set_tight_layout(True)
		ax=fig.add_subplot(111)

		ind = np.arange(5)
		width = 0.35
		rects1 = ax.bar(ind, sizes, width, color=colors)
		rects2 = ax.bar(ind+width, sizes_arti, width, color=colors, edgecolor=colors, alpha=0.45)
		ax.set_ylabel('Visit fraction')
		ax.set_ylim(0,1)
		ax.set_title('Race visits by ' + race.upper())
		ax.set_xticks(ind+width)
		ax.set_xticklabels( ('W', 'B', 'A', 'H', 'O') )
		ax.legend(	(rects1[lookup[race]], rects2[lookup[race]]),
					('Actual', 'Artificial') )

		path = 'data/' + my.DATA_FOLDER + 'visits/'
		if not os.path.exists(path): os.makedirs(path)
		plt.savefig(path + 'race_visits_' + race + '.pdf')
Beispiel #30
0
    def cluster_status(self, cluster, host, port, extended):
        try:
            request_health_url = 'http://%s:%s/_cluster/health?level=indices' \
                    % (host, port)
            request_health = requests.get(request_health_url)
            request_health.raise_for_status()

            request_state_url = 'http://%s:%s/_cluster/state' % (host, port)
            request_state = requests.get(request_state_url)
            request_state.raise_for_status()

            print 'SUCCESS: Fetching Cluster Status : "%s"\n' % (cluster)
            result_state = json.loads(request_state.content)
            result_health = json.loads(request_health.content)[u'indices']
            master_node = result_state[u'master_node']
            master_node_state = result_state[u'nodes'][master_node]

            print '\t Information:'
            print '\t\t Cluster Name: %s' % (result_state[u'cluster_name'])
            print '\t\t Master Node: %s' % (result_state[u'master_node'])
            if extended:
                print '\t\t\t Name: %s' % (master_node_state[u'name'])
                print '\t\t\t Transport Address: %s' % \
                        (master_node_state[u'transport_address'])

            print '\t Indices:'
            for index in result_state[u'metadata'][u'indices']:
                print '\t\t Name: %s' % (index)
                if extended:
                    index_result = result_state[u'metadata'][u'indices'][index]
                    print '\t\t\t State: %s' % (index_result[u'state'])
                    print '\t\t\t Replicas: %s' % \
                            (index_result[u'settings']
                                    [u'index.number_of_replicas'])
                    print '\t\t\t Shards: %s' % \
                            (index_result[u'settings']
                                    [u'index.number_of_shards'])
                    if index_result[u'state'] == 'close':
                        print '\t\t\t Status: CLOSED'
                    else:
                        print '\t\t\t Status: %s' % \
                                (result_health[index][u'status'])

            print '\t Nodes:'
            for node in result_state[u'nodes']:
                print '\t\t Node: %s' % (node)
                if extended:
                    print '\t\t\t Name: %s' % \
                            (result_state[u'nodes'][node][u'name'])
                    print '\t\t\t Transport Address: %s' % \
                            (result_state[u'nodes'][node]
                                    [u'transport_address'])

        except (requests.RequestException, urllib2.HTTPError), e:
            msg = 'Error Fetching Cluster Status - %s' % (e)
            raise exceptions.ActionClusterError(msg)
Beispiel #31
0
    def request(self,
                url,
                method='get',
                data=None,
                files=None,
                raw=False,
                headers=dict(),
                raise_for=dict()):
        if self.debug_requests:
            url_debug = _dump_trunc(url)
            log.debug(
                'HTTP request: {} {} (h: {}, data: {}, files: {}), raw: {}'.
                format(method, url_debug, headers, _dump_trunc(data),
                       _dump_trunc(files), raw))

        timeout = HTTPTimeout(**self.request_io_timeouts)

        method, body = method.lower(), None
        headers = dict() if not headers else headers.copy()
        headers.setdefault('User-Agent', 'txOneDrive')

        if data is not None:
            if method == 'post':
                headers.setdefault('Content-Type',
                                   'application/x-www-form-urlencoded')
                body = FileBodyProducer(io.BytesIO(urlencode(data)),
                                        timer=timeout)
            else:
                headers.setdefault('Content-Type', 'application/json')
                body = FileBodyProducer(io.BytesIO(json.dumps(data)),
                                        timer=timeout)

        if files is not None:
            boundary = os.urandom(16).encode('hex')
            headers.setdefault(
                'Content-Type',
                'multipart/form-data; boundary={}'.format(boundary))
            body = MultipartDataSender(files, boundary, timer=timeout)
            yield body.calculate_length()

        if isinstance(url, unicode): url = url.encode('utf-8')
        if isinstance(method, unicode): method = method.encode('ascii')

        res_deferred = first_result(
            timeout,
            self.request_agent.request(
                method.upper(), url,
                Headers(
                    dict(
                        (k, [v]) for k, v in (headers or dict()).viewitems())),
                body))
        code = res_body = None
        try:
            res = yield res_deferred
            code = res.code
            if code == http.NO_CONTENT: defer.returnValue(None)
            if code not in [http.OK, http.CREATED]:
                raise ProtocolError(code, res.phrase)

            res_body = defer.Deferred()
            res.deliverBody(DataReceiver(res_body, timer=timeout))
            res_body = yield first_result(timeout, res_body)

            if self.debug_requests:
                log.debug( 'HTTP request done ({} {}): {} {} {}, body_len: {}'\
                 .format(method, url_debug, code, res.phrase, res.version, len(res_body)) )
            defer.returnValue(json.loads(res_body) if not raw else res_body)

        except (timeout.ActivityTimeout, TimeoutError, ResponseFailed,
                RequestNotSent, RequestTransmissionFailed) as err:
            if isinstance(err, timeout.ActivityTimeout):
                if not res_deferred.called: res_deferred.cancel()
                if res_body and not res_body.called: res_body.cancel()
            if self.debug_requests:
                log.debug(
                 'HTTP transport (underlying protocol) error ({} {}): {}'\
                 .format(method, url_debug, err.message or repr(err.args)) )
            raise UnderlyingProtocolError(err)

        except ProtocolError as err:
            if self.debug_requests:
                log.debug(
                 'HTTP request handling error ({} {}, code: {}): {}'\
                 .format(method, url_debug, code, err.message) )
            raise raise_for.get(code, ProtocolError)(code, err.message)

        except RequestGenerationFailed as err:
            err[0][0].raiseException()

        finally:
            timeout.state_finished()
Beispiel #32
0
 def _get(self, queue):
     #self.refresh_connection()
     m = Queue.objects.fetch(queue)
     if m:
         return loads(m)
     raise Empty()
Beispiel #33
0
def overview(req, service_name):
    cluster_id = req.GET.get('cluster')
    service = None

    create_form = CreateForm()
    edit_form = EditForm(prefix='edit')

    if cluster_id and req.method == 'GET':

        input_dict = {'name': service_name, 'cluster_id': req.zato.cluster_id}

        response = req.zato.client.invoke('zato.service.get-by-name',
                                          input_dict)
        if response.has_data:
            service = Service()

            for name in ('id', 'name', 'is_active', 'impl_name', 'is_internal',
                         'usage', 'time_last', 'time_min_all_time',
                         'time_max_all_time', 'time_mean_all_time'):

                value = getattr(response.data, name)
                if name in ('is_active', 'is_internal'):
                    value = is_boolean(value)

                setattr(service, name, value)

            now = datetime.utcnow()
            start = now + relativedelta(minutes=-60)

            response = req.zato.client.invoke('zato.stats.get-by-service', {
                'service_id': service.id,
                'start': start,
                'stop': now
            })
            if response.has_data:
                for name in ('mean_trend', 'usage_trend', 'min_resp_time',
                             'max_resp_time', 'mean', 'usage', 'rate'):
                    value = getattr(response.data, name)
                    if not value or value == ZATO_NONE:
                        value = ''

                    setattr(service, 'time_{}_1h'.format(name), value)

            for channel_type in ('plain_http', 'soap', 'amqp', 'jms-wmq',
                                 'zmq'):
                channels = _get_channels(req.zato.client, req.zato.cluster,
                                         service.id, channel_type)
                getattr(service,
                        channel_type.replace('jms-', '') +
                        '_channels').extend(channels)

            for item in req.zato.client.invoke(
                    'zato.service.get-deployment-info-list',
                {'id': service.id}):
                service.deployment_info.append(
                    DeploymentInfo(item.server_name, loads(item.details)))

            # TODO: There needs to be a new service added zato.service.scheduler.job.get-by-service
            #       or .get-list should start accept a service name. Right now we pull all the
            #       jobs which is suboptimal.
            response = req.zato.client.invoke('zato.scheduler.job.get-list',
                                              {'cluster_id': cluster_id})
            if response.has_data:
                for item in response.data:
                    if item.service_name == service_name:
                        url = reverse('scheduler')
                        url += '?cluster={}'.format(cluster_id)
                        url += '&highlight={}'.format(item.id)
                        service.scheduler_jobs.append(
                            ExposedThrough(item.id, item.name, url))

    return_data = {
        'zato_clusters': req.zato.clusters,
        'service': service,
        'cluster_id': cluster_id,
        'choose_cluster_form': req.zato.choose_cluster_form,
        'create_form': create_form,
        'edit_form': edit_form,
    }

    return TemplateResponse(req, 'zato/service/overview.html', return_data)
Beispiel #34
0
def _load_resources():
    global resource_paths

    with open(os.path.join(macro.MACRO_APP_ROOT, 'build/rev-manifest.json'),
              'r') as f:
        resource_paths = json.loads(f.read())
Beispiel #35
0
 def test_loads(self):
     self.assertEqual(JSON_OBJECT, anyjson.loads(JSON_STRING))
Beispiel #36
0
def read_settings(filepath="scrapy-settings.json"):
    json_file = open(filepath, "r")
    settings = json.loads(json_file.read())
    json_file.close()
    return settings
Beispiel #37
0
 def load_func(self, data):
     return loads(data)