Exemple #1
0
 def GenCatalogEntry(row):
   i_deps = cjson.decode(row[7])
   if i_deps:
     i_deps_str = "|".join(i_deps)
   else:
     i_deps_str = "none"
   deps_with_desc = cjson.decode(row[6])
   deps = [x[0] for x in deps_with_desc if x[0].startswith('CSW')]
   if deps:
     deps_str = '|'.join(deps)
   else:
     deps_str = "none"
   entry = representations.CatalogEntry(
       catalogname=row[0],  # 0
       version=row[1],      # 1
       pkgname=row[2],      # 2
       basename=row[3],     # 3
       md5_sum=row[4],      # 4
       size=str(row[5]),    # 5
       deps=deps_str,       # 6
       category="none",     # 7
       i_deps=i_deps_str,   # 8
       desc=row[8], # 9
   )
   return entry
 def transform_graph(self):
     """
     This function transform ids to corresponding screen_name in the user_graph.txt file 
     """
     f = open("../semantic_analysis/data/labelled_user_dic_393.json", "r")
     user_dic = cjson.decode(f.readline())
     f.close()
     
     f = open("../semantic_analysis/data/labelled_id_name_map_393.json", "r")
     id_label_map = cjson.decode(f.readline())
     f.close()
     
     node_list, edge_list = self.read_node_edge_lists("user_graph/user_graph_393_2.txt")
     
     f = open("user_graph/user_graph_393_screen_name_2.txt","w")
     f.write("#nodes-screen_name\n")
     for user in user_dic:
         f.write(user+" "+user_dic[user]["label"]+"\n")
     f.write("#edges-screen_name\n")    
     for edge in edge_list:
         edge_with_name = []
         flag = 0
         for node in edge:
             if id_label_map.has_key(node):
                 edge_with_name.append(id_label_map[node]["screen_name"])
             else:
                 flag = 1
                 break
         if flag == 0:
             f.write(edge_with_name[0]+" "+edge_with_name[1]+"\n")
     f.close()
def json_parser(source, logger=None):
    """
    JSON parser based on json module. It accepts either source
    descriptor with .read()-supported file-like object or
    data as a string object.
    """
    if  isinstance(source, InstanceType) or isinstance(source, file):
        # got data descriptor
        try:
            jsondict = cjson.decode(source.read())
        except Exception as exc:
            print_exc(exc)
            source.close()
            raise
        source.close()
    else:
        data = source
        # to prevent unicode/ascii errors like
        # UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position
        # if  isinstance(data, str):
        #    data = unicode(data, errors='ignore')
        #    res  = data.replace('null', '\"null\"')
        #else:
        res  = data
        try:
            jsondict = cjson.decode(res)
        except:
            msg  = "json_parser, WARNING: fail to JSON'ify data:"
            msg += "\n%s\ndata type %s" % (res, type(res))
            if  logger:
                logger.warning(msg)
            else:
                print msg
            jsondict = eval(res, { "__builtins__": None }, {})
    yield jsondict
Exemple #4
0
	def test_31_exports(self):
		# public key
		(resp, data) = self.client.request('GET', '/openssl_export', {'name': 'usercert1.pub'})
		self.assertEqual(resp.status, 200)

		try:
			data = cjson.decode(data)
		except Exception:
			fail('cannot decode cjson data')

		self.assertTrue(data.startswith('-----BEGIN PUBLIC KEY-----'))

		# private key
		(resp, data) = self.client.request('GET', '/openssl_export', {'name': 'usercert1.key'})
		self.assertEqual(resp.status, 200)

		try:
			data = cjson.decode(data)
		except Exception:
			fail('cannot decode cjson data')

		self.assertFalse(data.startswith('-----BEGIN PUBLIC KEY-----'))

		# limit cases
		#		. no name
		(resp, data) = self.client.request('GET', '/openssl_export', {})
		self.assertEqual(resp.status, 400)

		# 	. file not found
		(resp, data) = self.client.request('GET', '/openssl_export', {'name': 'foo.bar'})
		self.assertEqual(resp.status, 404)
def process_user_graph():
    f = open("../user_dic/user_dic_09_wids_2.json","r")
    user_dic = cjson.decode(f.readline())
    user_ids = set()
    for user in user_dic:
        user_ids.add(str(user_dic[user]["id"]))
    print len(user_dic)
    print len(user_ids)
    
    f.close()
    fo = open("../social_graph/graph_reduced.txt","w")
    fo.write("#nodes\n")
    #for user in user_dic:
    #    fo.write(str(user_dic[user]["id"])+"\n")
    for user in user_ids:
        fo.write(user+"\n")
    
    fo.write("#edges\n")
    f = gzip.open("../social_graph/user_friends_09_total.json.gz")
    user_friends = cjson.decode(f.readline())
    print len(user_friends)

    for user in user_friends:
        if user in user_ids:
            for friend in user_friends[user]:
                if str(friend) in user_ids:
                    fo.write(user+" "+str(friend)+"\n")
                else:
                    pass
    f.close()
    fo.close()
def pre_process():
    """
    Dump user friends for whole month for September
    """
    user_friends = {}
    f = gzip.open("../social_graph/user_friends_09.json.gz", "r")
    for line in f:
        data = cjson.decode(line)
        for key in data:
            if not user_friends.has_key(key):
                user_friends.update({key : data[key]})
            else:
                pass
    f.close()
    f = gzip.open("../social_graph/user_friends_09_2.json.gz", "r")
    for line in f:
        data = cjson.decode(line)
        for key in data:
            if not user_friends.has_key(key):
                user_friends.update({key : data[key]})
            else:
                pass
    f.close()
    f = gzip.open("../social_graph/user_friends_09_total.json.gz","w")
    json.dump(user_friends, f)
    f.close()
Exemple #7
0
 def mqCallback(self, channel, method_frame, header_frame, body):
     try:
         if not self.zk.is_proxy_master():
             return
         # master's business
         data_dict = cjson.decode(body)
         # ** MUST ** ack
         channel.basic_ack(method_frame.delivery_tag)
         utils.log(utils.cur(), body, data_dict)
         if not isinstance(data_dict, dict):
             return
         for db, forbid in data_dict.iteritems():
             if not forbid[Forbid.KEY_TYPE] in (Forbid.FORBID_WORKING, Forbid.FORBID_FOREVER):
                 return
             forbid[Forbid.KEY_START] = time.time()
             path = os.path.join(ZKConf.ZK_PATH_FORBID, db)
             orig = self.get_path(path)
             if orig is False:
                 self.zk.mknode(path, cjson.encode(forbid))
             else:
                 old = cjson.decode(orig)
                 if (
                     old[Forbid.KEY_TYPE] == forbid[Forbid.KEY_TYPE]
                     and old[Forbid.KEY_TYPE] == Forbid.FORBID_WORKING
                     and old[Forbid.KEY_START] + old[Forbid.KEY_DURATION] > time.time()
                 ):
                     utils.log(utils.cur(), "still forbidding")
                 else:
                     utils.log(utils.cur(), "change forbid")
                     # change /database/forbid/db
                     self.forbidinfo[db] = forbid
                     self.zk.set(path, cjson.encode(forbid))
     except Exception, err:
         utils.err(utils.cur(), err)
Exemple #8
0
 def _eval_response(self,r):
     #print '_eval_response r',r
     try:
         json = decode(r)
     except DecodeError:
         # This is for the case of, e.g.,
         # {"status":"Ok.","response":{1:"I1\n."}}
         # where cjson won't let us use 1 as a key.
         import re
         p = re.compile('\{(\d+)\:')
         for m in p.finditer(r):
             r = r.replace(m.group(0),'{"n%s":' % m.group(1))
         #print '_eval_response r modified',r
         json = decode(r)
     if json.get('status') == 'Ok.':
         d = {}
         pairs = json.get('response',"NULL.")
         #if not pairs in ["NULL.","PONG."]:
         if isinstance(pairs,dict):
             for k,v in pairs.items():
                 d[self._if_numeric(k)] = self._loads(v)
         if d.keys():
             json['response'] = d
         else:
             json['response'] = pairs
     return json
def main():
    n_sample=0
    filteredData = open(FILTERED_DATA_PATH_JSON,'r')
    for line in filteredData: #parsing tweets data file to generate a graph by adding edges
        data = cjson.decode(line)
        n_sample+=1
        if data["postId"] not in  GlobalPostUserDict:
            GlobalPostUserDict[data["postId"]]=data["ownerUserId"]
        accumulateTags(data["tags"])
    
    n_feature=len(GlobalTagList)
    
    global clusterData
    clusterData=np.zeros(shape=(n_sample,n_feature), dtype=float)  
        
    labels=[]  
    sampleId=0
    filteredData = open(FILTERED_DATA_PATH_JSON,'r')
    for line in filteredData: #parsing tweets data file to generate a graph by adding edges
        data = cjson.decode(line)
        labels.append(data["postId"])
        processrecord(data,sampleId)
        sampleId+=1
        
    clustersize=100
    createKcluster(clusterData,clustersize,len(labels),labels)    
Exemple #10
0
 def prediction_by_major_topic(self,rulelistfile,ground_truth_file,user_tag_dict_file,tfd,cosine):
     #def prediction_by_strong_rule(self,rulelistfile,ground_truth_file,user_tag_dict_file):
     #order the rules of existing tag in decreasing order in one list.
     user_tag_dict={}
     ground_truth=defaultdict(dict)
     rule_by=defaultdict(list)
     rule_by1=defaultdict(dict)
     #candidate=[]
     for line in open(user_tag_dict_file,'r'):
         line=cjson.decode(line)
         user_tag_dict[line[0]]=line[1]
     for line in open(ground_truth_file,'r'):
         line=cjson.decode(line)
         ground_truth[line[0]]=line[1]
     for line in open(rulelistfile,'r'):
         line=cjson.decode(line)
         if line[2]>0.5:
             rule_by[line[0]].append(line)
         
         rule_by1[line[0]][line[1]]=line[2]
     tagfile=open(tfd,'r')
     tfd=cjson.decode(tagfile.readline())
     cosine=open(cosine,'r')
     cosine=cjson.decode(cosine.readline())
     #    if line[0] in ground_truth and line[1] not in groud_truth and line[2]>0.5:
     #    rule_by[line[0]].append(line)
     #candidate.append(line[1])
     #for key in rule_by:
     #    rule_by[key]=sorted(rule_by[key],key=itemgetter(2),reverse=1)
     #print rule_by[1]
     cnt=0
     cntt=0
     cntp=0
     for user,utd in user_tag_dict.iteritems():
         candidate=[]
         for key in utd:
             for rule in rule_by[key]:
                 if rule[1] not in utd:
                     candidate.append([rule[0],rule[1]])
         canscore=[]
         for can in candidate:
             score=0
             for item in utd:
                 cosinev=cosine.get(can+'_'+item,0)
                 cosineq=cosine.get(item+'_'+can,0)
                 score+=(cosinev+consineq)*log(utd[item])
             #lets use  sum(sim(k,j)*log(f(j)))
             canscore.append([can[1],score])
             #print canscore
             #print ground_truth[user]
         if canscore!=[]:
             tag=sorted(canscore,key=itemgetter(1),reverse=1)[0]
             #print tag 
             cnt+=1
         
             if tag[0] in ground_truth[user]:
                 cntt+=1
                 cntp+=ground_truth[user][tag[0]]
     print cnt,cntt,cntp
Exemple #11
0
def decode(data):
    if not data:
        return data
    # return json.loads(data)
    try:
        return cjson.decode(data, encoding='utf-8')
    except cjson.DecodeError:
        return cjson.decode(data)
Exemple #12
0
def thrash_cjson():
    for obj, tns, json, msh in TESTS:
        if TEST_DUMP_ONLY:
            cjson.encode(obj)
        elif TEST_LOAD_ONLY:
            assert cjson.decode(json) == obj
        else:
            assert cjson.decode(cjson.encode(obj)) == obj
Exemple #13
0
def webPlaylist():
    """
        Choose the next song, given a current song and a list of forbidden songs
    """
    seeds = json.decode(flask.request.form.get("seeds", ""))
    not_list = json.decode(flask.request.form.get("not_list", ""))

    return json.encode(playlist.nextSong(seeds, not_list))
 def __call__(self, value=None):
     if value is None or value == '':
         raise ValidationError(message=self.message, code=self.code)
     import cjson
     try:
         cjson.decode(value)
     except:
         raise ValidationError(message=self.message, code=self.code)
Exemple #15
0
 def iterateJsonFromFile(file, remove_params_dict=False):
     for line in open(file): 
         try:
             if not remove_params_dict: yield cjson.decode(line)
             else:
                 data = cjson.decode(line)
                 if 'PARAMS_DICT' not in data: yield data
         except: pass
Exemple #16
0
def main(prefix, date):
    json = open(os.path.join(json_path, '%s_%s.json' % (prefix, date)))
    pos = open(os.path.join(json_path, '%s_positions.json' % prefix))

    timestamp, nodes, ways, rels, tags = cjson.decode(json.readline())
    positions = cjson.decode(pos.readline())

    make_pickles(prefix, date, nodes, ways, rels, tags, positions)
Exemple #17
0
def webPlaylist():
    '''
        Choose the next song, given a current song and a list of forbidden songs
    '''
    seeds       = json.decode(flask.request.form.get('seeds', ''))
    not_list    = json.decode(flask.request.form.get('not_list', ''))

    return json.encode(playlist.nextSong(seeds, not_list))
Exemple #18
0
    def prediction_by_strong_rule_of_most_voted_tag(self,rulelistfile,ground_truth_file,user_tag_dict_file):
        #order the rules for each tag in a list of list
        user_tag_dict={}
        ground_truth=defaultdict(dict)
        rule_by_tag=defaultdict(list)
        for line in open(user_tag_dict_file,'r'):
            line=cjson.decode(line)
            user_tag_dict[line[0]]=line[1]
        for line in open(ground_truth_file,'r'):
            line=cjson.decode(line)
            ground_truth[line[0]]=line[1]
        for line in open(rulelistfile,'r'):
            line=cjson.decode(line)
            if line[2]>0.5:
                rule_by_tag[line[0]].append(line)
        for key in rule_by_tag:
            rule_by_tag[key]=sorted(rule_by_tag[key],key=itemgetter(2),reverse=1)
        cnt=0
        cntt=0
        cntp=0
        for user,utd in user_tag_dict.iteritems():
            #print user
            #if user==34272713:
            #    print utd 
            utd1=sorted(utd.items(),key=itemgetter(1),reverse=1)
            found=False
            while found==False:
                for item in utd1:
                    if item[0] in rule_by_tag:
                        rules=rule_by_tag[item[0]]
                        for rule in rules:
                            if rule[1] not in utd:
                                #print user,rule[1]
                                cnt+=1
                                if rule[1] in ground_truth[user]:
                                    cntt+=1
                                    cntp+=ground_truth[user][rule[1]]
                                    #if user=='34272713':

                                    #print user,rule
                                else:
                                    #if user==61521908:
                                      #  print utd
                                        
                                    
                                    #print user,rule,ground_truth[user]
                                    pass
                                found=True
                                break
                            else:
                                continue
                    if found==True:
                        break
                    else:
                        continue
                break
        print cnt,cntt,cntp
    def hasDifference(self, account, local_json, remote_json, icloud=False):
        changed_keys = set()
        BlinkLogger().log_debug(u"Computing differences from iCloud for %s" % account.id)
        try:
            local_data = cjson.decode(local_json)
        except TypeError:
            return True

        try:
            remote_data = cjson.decode(remote_json)
        except TypeError:
            return True

        differences = DictDiffer(local_data, remote_data)

        diffs = 0
        for e in differences.changed():
            if e in self.skip_settings:
                continue
            BlinkLogger().log_debug('Setting %s has changed' % e)
            changed_keys.add(e)
            diffs += 1

        for e in differences.added():
            if e in self.skip_settings:
                continue

            BlinkLogger().log_debug('Setting %s has been added' % e)

            if not local_data.has_key(e):
                BlinkLogger().log_debug('Remote added')
            elif not remote_data.has_key(e):
                BlinkLogger().log_debug('Local added')

            changed_keys.add(e)
            diffs += 1

        for e in differences.removed():
            if e in self.skip_settings:
                continue

            BlinkLogger().log_debug('Setting %s has been removed' % e)

            if not local_data.has_key(e):
                BlinkLogger().log_debug('Local removed')

            if not remote_data.has_key(e):
                BlinkLogger().log_debug('Remote removed')

            changed_keys.add(e)
            diffs += 1

        if diffs and icloud:
            self.notification_center.post_notification("iCloudStorageDidChange", sender=self, data=NotificationData(account=account.id, changed_keys=changed_keys))

        return bool(diffs)
Exemple #20
0
def construct_directed_graph_from_rules(rules_file,tagfreqdict):
    graph=defaultdict(dict)
    tagfreqdict=cjson.decode(tagfreqdict.readline())
    for rule in rules_file:
        rule = cjson.decode(rule)
        if rule[2]>0.3:
        	graph[rule[1]][rule[0]]=-rule[2]
    for tag in tagfreqdict:
        graph['ROOT'][tag]=-0.1
    return graph
def store_document(storage, index, document):
    cluster_index = str(int(index[0:2], 16))
    # to check the validity of the document on write
    if CHECK_VALID_JSON_DOCUMENT:
        json.decode(document)
    fh = open(os.path.join(storage, cluster_index, index), "wb")
    # simple os lock system
    #fcntl.lockf(fh, fcntl.LOCK_EX)
    fh.write(document)
    fh.close()
Exemple #22
0
    def test_join_pool(self):

        req = join_pool_request('127.0.1.1:8080', '127.0.1.1:8081')
        data = urllib2.urlopen(req)
        self.assertEqual(
            cjson.decode(data.read()),
            [
                {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"},
                {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"}
            ]
        )

        req = get_pool_request('127.0.0.1:8081')
        data = urllib2.urlopen(req)
        self.assertEqual(
            cjson.decode(data.read()),
            [
                {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"},
                {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"}
            ]
        )
        
        req = get_pool_request('127.0.0.1:8080')
        data = urllib2.urlopen(req)
        self.assertEqual(
            cjson.decode(data.read()),
            [
                {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"},
                {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"}
            ]
        )


        req = join_pool_request('127.0.1.1:8082', '127.0.1.1:8081')
        data = urllib2.urlopen(req)
        
        self.assertEqual(
            cjson.decode(data.read()),
            [
                {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"},
                {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"},
                {"index": 2, "state": "pooling", "address": "127.0.1.1:8082"}
            ]
        )

        req = get_pool_request('127.0.0.1:8081')
        data = urllib2.urlopen(req)
        self.assertEqual(
            cjson.decode(data.read()),
            [
                {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"},
                {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"},
                {"index": 2, "state": "pooling", "address": "127.0.1.1:8082"}
            ]
        )
Exemple #23
0
	def test_30_getinfos(self):
		# CA
		(resp, data) = self.client.request('GET',	'/openssl_certificateinfos',
			{'name': 'catest1.crt'})
		self.assertEqual(resp.status, 200)

		try:
			data = cjson.decode(data)
		except Exception:
			fail('cannot decode cjson data')

		self.assertEqual(data['CA']    , 1)
		self.assertEqual(data['length'], 1024)

		# self-signed certificate
		(resp, data) = self.client.request('GET',	'/openssl_certificateinfos',
			{'name': 'usercert1.pem'})
		self.assertEqual(resp.status, 200)

		try:
			data = cjson.decode(data)
		except Exception:
			fail('cannot decode cjson data')

		for k in ('sn','CA','length','fingerprint','validity-start','validity-end','subject','issuer'):
			self.assertTrue(k in data)

		for k in ('C','CN','L','O','OU','ST','emailAddress'):
			self.assertTrue(k in data['subject'])
			self.assertTrue(k in data['issuer'])

		self.assertEqual(data['CA']    , 0)
		self.assertEqual(data['length'], 432)
		self.assertEqual(data['subject']['CN'], 'wiki.xivo.com')

		# CA-signed certificat
		(resp, data) = self.client.request('GET',	'/openssl_certificateinfos',
			{'name': 'usercert4.crt'})
		self.assertEqual(resp.status, 200)

		try:
			data = cjson.decode(data)
		except Exception:
			fail('cannot decode cjson data')

		self.assertEqual(data['CA']    , 0)
		self.assertNotEqual(data['subject']['emailAddress'], data['issuer']['emailAddress'])


		# limit cases
		# 	. file do not exists
		(resp, data) = self.client.request('GET',	'/openssl_certificateinfos',
			{'name': 'foo'})
		self.assertEqual(resp.status, 404)
Exemple #24
0
def measureDecoderThroughput(data):
    json=cjson.encode(data)
    bytes=0
    st=time.time()
    cnt=0
    while True:
        dt=time.time()-st
        if dt>=0.5 and cnt>9: break
        cjson.decode(json)
        bytes+=len(json)
        cnt+=1
    return int(math.floor(bytes/dt/1024.0+0.5))
Exemple #25
0
 def testDecoderExtension(self):
     re_date=re.compile('^new\sDate\(Date\.UTC\(.*?\)\)')
     def dateDecoder(json,idx):
         json=json[idx:]
         m=re_date.match(json)
         if not m: raise 'cannot parse JSON string as Date object: %s'%json[idx:]
         args=cjson.decode('[%s]'%json[18:m.end()-2])
         dt=datetime.date(*args)
         return (dt,m.end())
     self.assertEqual(cjson.decode('[1, new Date(Date.UTC(2007,1,2)), 2]', extension=dateDecoder), [1,datetime.date(2007,1,2),2])
     self.assertEqual(cjson.decode('[1, new Date(Date.UTC( 2007, 1 , 2 )) , 2]', extension=dateDecoder), [1,datetime.date(2007,1,2),2])
     self.assertRaises(cjson.DecodeError, lambda: cjson.decode('1', extension=0))
Exemple #26
0
def get_field_data_form_json(json_data,field_list=[]):
    if len(field_list) == 0:
        return cjson.decode(json_data)
    else:
        try:
            data = cjson.decode(json_data)
        except:
            data = {}
        temp_data = {}
        for i in data:
            if i in field_list:
                temp_data[i] = data[i]
        return temp_data
Exemple #27
0
 def GET(self, md5_sum):
   try:
     cat_gen_data = models.CatalogGenData.selectBy(md5_sum=md5_sum).getOne()
   except sqlobject.main.SQLObjectNotFound:
     raise web.notfound("RestSvr4CatalogData for %r not found" % md5_sum)
   simple_data = {
       'deps': cjson.decode(cat_gen_data.deps),
       'i_deps': cjson.decode(cat_gen_data.i_deps),
       'pkginfo_name': cat_gen_data.pkginfo_name,
       'pkgname': cat_gen_data.pkgname,
   }
   response = cjson.encode(simple_data)
   web.header('Content-Length', str(len(response)))
   return response
Exemple #28
0
	def testMultikeyGet(self):
		"""Make a temp view."""
		be1 = CouchStub()
		be1_request = be1.expect_POST("/funstuff0/_all_docs?include_docs=true")
		be1_request.reply(200, dict(
			total_rows=2,
			offset=0,
			rows=[
				{"id":"a", "key":"a", "value": {"rev":"2"},"doc":"b"},
				{"id":"c", "key":"c", "value": {"rev":"3"},"doc":"d"}
			]))
		be1.listen("localhost", 23456)

		be2 = CouchStub()
		be2_request = be2.expect_POST("/funstuff1/_all_docs?include_docs=true")
		be2_request.reply(200, dict(
			total_rows=2,
			offset=0,
			rows=[
				{"id":"b", "key":"b", "value": {"rev":"7"},"doc":"z"},
				{"id":"y", "key":"y", "value": {"rev":"9"},"doc":"w"}
			]))
		be2.listen("localhost", 34567)

		resp = post("http://localhost:22008/funstuff/_all_docs?include_docs=true", body={"keys":["a","c","x","y"]})

		be1.verify()
		be2.verify()
		
		be1_post = cjson.decode(be1_request.input_body)
		be2_post = cjson.decode(be2_request.input_body)

		def lounge_hash(x):
			crc = zlib.crc32(x,0)
			return (crc >> 16)&0x7fff

		keys1 = be1_post['keys']
		keys2 = be2_post['keys']
		keys = {0:keys1, 1:keys2}
		num_shards = 2
		for v, k in keys.items():
			for key in k:
				self.assertEqual(lounge_hash(key) % num_shards, int(v))

		self.assertEqual(resp.body["total_rows"], 4)
		self.assertEqual(resp.body["offset"], 0)
		self.assertEqual(len(resp.body["rows"]), 4)
		rows = [x["key"] for x in resp.body["rows"]]
		rows.sort()
		self.assertEqual(rows, ["a","b","c","y"])
Exemple #29
0
    def test_01_dhcp(self):
        (resp, data) = self.client.view(self.obj, 0)
        self.assertEqual(resp.status, 200)
        
        data = cjson.decode(data)
#        pprint.pprint(data)
        
        # EDIT
        with open('xivojson/dhcp.json') as f:
            content = cjson.decode(f.read())
            
#        print content
        (resp, data) = self.client.edit(self.obj, content)
        self.assertEqual(resp.status, 200)
def sjson_parser(source, logger=None):
    """
    SJSON parser based on cjson module. Reads data line by line in
    streaming json format.
    """
    obj = None
    for line in source:
      if not obj:
        obj = cjson.decode(line + "]}")
      elif line.startswith("]"):
        break
      else:
        o = cjson.decode(line[1:])
        yield o
Exemple #31
0
    if lat>24.52 and lat<49.38 and lon<-66.95 and lon>-124.77:
        return 1
    elif lat>54.66 and lat<71.83 and ((lon<-130 and lon>-180) or (lon>173 and lon<180)):
        return 1
    else:
        return 0
#for line in open(infile,'r'):
#    line1=cjson.decode(line)
#    #if 'entertain'in line1['tag'] or 'entertainment' in line1['tag']:
#    lat_u=line1['user_lat']
#    lon_u=line1['user_lng']
#    lat_c=line1['list_creator_lat']
#    lon_c=line1['list_creator_lng']
#    if in_box_us(lat_u,lon_u)==1 or in_box_us(lat_c,lon_c)==1:
#        outfile1.write(line)


for line in open(infile,'r'):
    line1=cjson.decode(line)
#    if 'tech' in line1['tag']  or 'techy' in line1['tag'] or 'technology' in line1['tag']:
    #if 'entertain' in line1['tag']  or 'entertainment' in line1['tag'] or 'entertaining' in line1['tag'] or 'entertainer' in line1['tag']
    if 'nutrition' in line1['tag']:
        lat_u=line1['user_lat']
        lon_u=line1['user_lng']
        lat_c=line1['list_creator_lat']
        lon_c=line1['list_creator_lng']
        if in_box_us(lat_u,lon_u)==1 or in_box_us(lat_c,lon_c)==1:
            outfile1.write(line)

           
Exemple #32
0
def unpack(raw):
    # return decode(decompress(raw))
    return decode(raw)
Exemple #33
0
import cjson
infile1 = '../backbone/lc/gpd_hou'
infile2 = '../backbone/graph/gpd_hou_cost'
outfile = open('./hou', 'w')
nm = open(infile1, 'r')
lc = open(infile2, 'r')
nm = cjson.decode(nm.readline())
lc = cjson.decode(lc.readline())
for key, value in lc.iteritems():
    if key in nm.keys():
        if nm[key][0] != value[0]:
            print >> outfile, key, '-', value[0], '-', nm[key][0]
    else:
        print >> outfile, '---', key, '-', value
Exemple #34
0
def readUsers():
    f = open("users.passwd")
    users = f.read()
    f.close()
    users = cjson.decode(users)
    return users
Exemple #35
0
#remove tag that are not in the tag_user_dict
import sys
import cjson

argv = sys.argv
#inputfilename unique_tag outputfilename
infile1 = open(argv[1], 'r')
infile2 = open(argv[2], 'r')
tag_f = cjson.decode(infile2.readline())
tag_f = tag_f.keys()
outfile = open(argv[3], 'w')
for line in infile1:
    line = line.split('\t')[1]
    line = cjson.decode(line)
    dictt = {}
    for key, value in line[1].iteritems():
        if key in tag_f and value > 1:
            dictt[key] = value
    outfile.write(cjson.encode([line[0], dictt]) + '\n')
Exemple #36
0


if __name__ == "__main__":
    try:
        filename = '../../centrality/lc/rulelist_tfidf1-hou'#sys.argv[1]
        rules_file=open(filename,'r')
        tagfreqdict=open('../lc/tag_freq_dict_hou','r')

        root = 'ROOT' #sys.argv[2]
    except IndexError:
        sys.stderr.write('no input and/or root node specified\n')
        sys.stderr.write('usage: python edmonds.py <file> <root>\n')
        sys.exit(1)
    G=construct_directed_graph_from_rules(rules_file,tagfreqdict)   
    h = edmonds(root,G)
    outfile=open('test','w')
    rootdict={}
    for s in h:
        for t in h[s]:
            rootdict[t]=s
            print>>outfile, "%s-%s" % (s,t)
    infile1=open('../../centrality/lc/gpd_hou','r')
    rootdict1=cjson.decode(infile1.readline())
    for key,value in rootdict1.iteritems():
        if rootdict[key]!=value[0]:
            print key,value, rootdict[key]
    


Exemple #37
0
#   cjson: 6.57
#   json: 11.86
#   simplejson: 14.98
#   cPickle: 30.98
# +- 0.5 seconds
try:
    import ujson
    _json_decode = ujson.decode
    _json_encode = ujson.encode
except ImportError:
    try:
        import cjson
        # You can find the patch for cjson here: http://vazor.com/media/python-cjson-1.0.5.diff
        # An unpatched version (1.0.5 is the latest as of writing)
        # won't decode that correctly and thus trigger an AssertionError here:
        assert '/' == cjson.decode('"\/"')
        _json_decode = cjson.decode
        _json_encode = cjson.encode
    except (AssertionError, ImportError):
        try:
            # But - json ist slower? We will overwrite that later by simplejson. ;-)
            import json
            assert hasattr(json, "loads") and hasattr(json, "dumps")
            _json_decode = json.loads
            _json_encode = json.dumps
            has_python_json = True
        except (AssertionError, ImportError):
            has_python_json = False

        try:
            import simplejson
 def _get_dict(self):
     return decode(self._get_json_string())
#http://nominatim.openstreetmap.org/reverse?format=xml&lat=52.5487429714954&lon=-1.81602098644987&zoom=18&addressdetails=1
#import requests
#r=requests.post(std)
#for line in r.iter_lines():
#    line=cjson.decode(line)
#    print line
#    print line.keys()
#    print line['address']['county']
infile = '/spare/wei/folk/listings_us'#list_creator_user_location_nonsingular_100'
outfile='/spare/wei/folk/list_creator_user_addr_us'
web='http://nominatim.openstreetmap.org/reverse?format=json'
web='http://open.mapquestapi.com/nominatim/v1/reverse.php?format=json'
extra='&zoom=%2018&addressdetails=1'
outfile=open(outfile,'w')
for line in open(infile,'r'):
    line=cjson.decode(line)  
    #print line['tag']
    if 'tech' in line['tag'] or 'technology' in line['tag']:
#        print line['tag']
        lat_1=line['list_creator_lat']
        lon_1=line['list_creator_lng']
        lat_2=line['user_lat']
        lon_2=line['user_lng']
        creator_lat='&lat='+str(lat_1)
        creator_lon='&lon='+str(lon_1)
        user_lat='&lat='+str(lat_2)
        user_lon='&lon='+str(lon_2)
        try:
            creator_url=web+creator_lat+creator_lon#+extra
#            print creator_url
            creator_info = urllib2.urlopen(creator_url)
Exemple #40
0
def retrieve_graph_from_dbpedia(term):
    assert ONLINE_ENABLED
    logger.info('online access - DBpedia: {term}'.format(term=unicode(term)))
    term_utf = term.encode('utf-8')
    term_url = quote_plus(term_utf, safe=str("/:#,()'"))
    #print '---'
    #print 'term_url', term_url
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    #query = """
    #    SELECT ?p ?o
    #    WHERE {{ <{term_url}> ?p ?o }}
    #""".format(term_url=term_url)
    query = """
        SELECT ?p ?o
        WHERE {{
            <{term_url}> ?p ?o
            FILTER( STRSTARTS(STR(?p), "{foaf}")
                || STRSTARTS(STR(?p), "{rdf}")
                || STRSTARTS(STR(?p), "{rdfs}")
                || STRSTARTS(STR(?p), "{dcterms}")
                || STRSTARTS(STR(?p), "{ontology}"))
            FILTER (isURI(?o) || langMatches(lang(?o), "EN"))
        }}
    """.format(term_url=term_url,
            foaf=unicode(FOAF),
            rdf=unicode(RDF),
            rdfs=unicode(RDFS),
            dcterms=unicode(DCTERMS),
            ontology=unicode(ONTOLOGY))

    sparql.setQuery(query.encode('utf-8'))
    sparql.setReturnFormat(JSON)
    try:
        results = sparql.query()
        # workaround for "Invalid \escape" error which can be raised by
        # convert()
        body = results.response.read()
        results = cjson.decode(body)
    except HTTPError as exc:
        # can occur if DBpedia is under maintenance (quite often)
        logger.error('Getting graph for {term} failed; {message}; {excType}'
            .format(term=term, message=exc.message, excType=unicode(type(exc))))
        return None

    # create graph and bind relevant namespaces
    graph = Graph()
    for prefix, namespace in NAMESPACES_DICT.items():
        graph.bind(prefix, namespace)

    LITERAL_MAX_LENGTH = 600
    for result in results["results"]["bindings"]:
        try:
            p = URIRef(result['p']['value'])
            # filter wikiPageRevisionID, wikiPageExternalLike etc.
            if p.startswith(ONTOLOGY['wiki']):
                continue
            if result['o']['type'] == 'uri':
                o = URIRef(result['o']['value'])
            else:
                o = Literal(result['o']['value'])
                # if object is too long (e.g. abstract, ignore it)
                if len(o) > LITERAL_MAX_LENGTH:
                    continue
            graph.add((term, p, o))
            #print type(p), p
            #print type(o), o
            #print '*'
        except KeyError:
            continue

    # check if the graph is not empty
    if not graph:
        logger.warning('Retrieved empty graph for ' + unicode(term))

    return graph
Exemple #41
0
#get tag_freq_dict for training data from user_tag_dict
import sys
import cjson
infilename = str(sys.argv[1])
outfilename = str(sys.argv[2])
#unique=str(sys.argv[3])
#uniquetag=open(unique,'w')
outfile = open(outfilename, 'w')
infile = open(infilename, 'r')
print infilename, outfilename
#unique_tag={}
tag_freq_dict = {}
for line in infile:
    #    print line
    #line = line.split('\t')
    tagdict = cjson.decode(line)[1]
    for key, value in tagdict.iteritems():
        tag_freq_dict[key] = tag_freq_dict.get(key, 0) + 1
    #if len(tagdict[1])<5 or tagdict[-1]<9:
    #    continue
    #else:
    #unique_tag[tagdict[0]]=tagdict[-1]
outfile.write(cjson.encode(tag_freq_dict) + '\n')
#uniquetag.write(cjson.encode(unique_tag)+'\n')
Exemple #42
0
    def main(self):
        while not self.finished():
            if self.dataReady("inbox"):
                channel = self.recv("inbox")
                sleeper.sleep(
                    1)  # Temporary delay to ensure not hammering /programmes

                # Setup in case of URL errors later
                data = None

                # Define URLs for getting schedule data and DVB bridge information
                # By BBC covention, schedule info runs to 5am the next day
                if datetime.utcnow().hour < 5:
                    scheduleurl = "http://www.bbc.co.uk" + self.channels[
                        channel][1] + "/" + strftime(
                            "%Y/%m/%d", gmtime(time() - 86400)) + ".json"
                else:
                    scheduleurl = "http://www.bbc.co.uk" + self.channels[
                        channel][1] + "/" + strftime("%Y/%m/%d", gmtime(
                            time())) + ".json"
                #syncschedurl = "http://beta.kamaelia.org:8082/dvb-bridge?command=channel&args=" + urllib.quote(self.channels[channel][0])
                #synctimeurl = "http://beta.kamaelia.org:8082/dvb-bridge?command=time"
                syncschedurl = "http://10.92.164.147:8082/dvb-bridge?command=channel&args=" + urllib.quote(
                    self.channels[channel][0])
                synctimeurl = "http://10.92.164.147:8082/dvb-bridge?command=time"

                # Grab SyncTV time data to work out the offset between local (NTP) and BBC time (roughly)
                self.send([synctimeurl], "dataout")
                while not self.dataReady("datain"):
                    self.pause()
                    yield 1
                recvdata = self.recv("datain")
                if recvdata[0] == "OK":
                    content = recvdata[1]
                else:
                    content = None

                # Work out time difference between local time and BBC time
                if content != None:
                    try:
                        decodedcontent = cjson.decode(content)
                        if decodedcontent[0] == "OK":
                            difference = time() - decodedcontent[2]['time']
                    except cjson.DecodeError, e:
                        print "cjson.DecodeError:", e.message

                if 'difference' in locals():
                    # Grab actual programme start time from DVB bridge channel page
                    self.send([syncschedurl], "dataout")
                    while not self.dataReady("datain"):
                        self.pause()
                        yield 1
                    recvdata = self.recv("datain")
                    if recvdata[0] == "OK":
                        content = recvdata[1]
                    else:
                        content = None

                    if content != None:
                        try:
                            decodedcontent = cjson.decode(content)
                            if decodedcontent[0] == "OK":
                                proginfo = decodedcontent[2]['info']
                        except cjson.DecodeError, e:
                            print "cjson.DecodeError:", e.message

                # Grab BBC schedule data for given channel
                self.send([scheduleurl], "dataout")
                while not self.dataReady("datain"):
                    self.pause()
                    yield 1
                recvdata = self.recv("datain")
                if recvdata[0] == "OK":
                    content = recvdata[1]
                else:
                    content = None

                # Read and decode schedule
                if content != None:
                    try:
                        decodedcontent = cjson.decode(content)
                    except cjson.DecodeError, e:
                        print "cjson.DecodeError:", e.message

                    if 'proginfo' in locals():
                        showdate = proginfo['NOW']['startdate']
                        showtime = proginfo['NOW']['starttime']
                        actualstart = proginfo['changed']
                        showdatetime = datetime.strptime(
                            str(showdate[0]) + "-" + str(showdate[1]) + "-" +
                            str(showdate[2]) + " " + str(showtime[0]) + ":" +
                            str(showtime[1]) + ":" + str(showtime[2]),
                            "%Y-%m-%d %H:%M:%S")

                        # SyncTV (DVB Bridge) produced data - let's trust that
                        if 'decodedcontent' in locals():
                            for programme in decodedcontent['schedule']['day'][
                                    'broadcasts']:
                                starttime = parse(programme['start'])
                                gmt = pytz.timezone("GMT")
                                starttime = starttime.astimezone(gmt)
                                starttime = starttime.replace(tzinfo=None)
                                # Attempt to identify which DVB bridge programme corresponds to the /programmes schedule to get PID
                                if showdatetime == starttime or (
                                        showdatetime + timedelta(minutes=1)
                                        == starttime and string.lower(
                                            proginfo['NOW']['name'])
                                        == string.lower(
                                            programme['programme']
                                            ['display_titles']['title'])
                                ) or (showdatetime - timedelta(minutes=1)
                                      == starttime
                                      and string.lower(proginfo['NOW']['name'])
                                      == string.lower(
                                          programme['programme']
                                          ['display_titles']['title'])):
                                    duration = (
                                        proginfo['NOW']['duration'][0] * 60 *
                                        60
                                    ) + (proginfo['NOW']['duration'][1] *
                                         60) + proginfo['NOW']['duration'][2]
                                    progdate = parse(programme['start'])
                                    tz = progdate.tzinfo
                                    utcoffset = datetime.strptime(
                                        str(tz.utcoffset(progdate)),
                                        "%H:%M:%S")
                                    utcoffset = utcoffset.hour * 60 * 60
                                    # Something's not right with the code below #TODO #FIXME
                                    timestamp = sleeper.mktime(
                                        showdatetime.timetuple()) + utcoffset
                                    if 'difference' in locals():
                                        offset = (timestamp -
                                                  actualstart) - difference
                                    else:
                                        offset = timestamp - actualstart
                                    pid = programme['programme']['pid']
                                    title = programme['programme'][
                                        'display_titles']['title']
                                    # Fix for unicode errors caused by some /programmes titles
                                    if (not isinstance(title, str)) and (
                                            not isinstance(title, unicode)):
                                        title = str(title)
                                    print[
                                        pid, title, offset, duration,
                                        str(showdatetime) + " GMT", utcoffset
                                    ]
                                    data = [
                                        pid, title, offset, duration,
                                        timestamp, utcoffset
                                    ]

                    else:
                        # Couldn't use the DVB Bridge, so work out what's on NOW here
                        utcdatetime = datetime.now()

                        # Analyse schedule
                        if 'decodedcontent' in locals():
                            for programme in decodedcontent['schedule']['day'][
                                    'broadcasts']:
                                starttime = parse(programme['start'])
                                starttime = starttime.replace(tzinfo=None)
                                endtime = parse(programme['end'])
                                endtime = endtime.replace(tzinfo=None)
                                if (utcdatetime >= starttime) & (utcdatetime <
                                                                 endtime):
                                    pid = programme['programme']['pid']
                                    title = programme['programme'][
                                        'display_titles']['title']
                                    # Fix for unicode errors caused by some /programmes titles
                                    if (not isinstance(title, str)) and (
                                            not isinstance(title, unicode)):
                                        title = str(title)
                                    # Has to assume no offset between scheduled and actual programme start time as it knows no better because of the lack of DVB bridge
                                    progdate = parse(programme['start'])
                                    tz = progdate.tzinfo
                                    utcoffset = datetime.strptime(
                                        str(tz.utcoffset(progdate)),
                                        "%H:%M:%S")
                                    utcoffset = utcoffset.hour * 60 * 60
                                    timestamp = sleeper.mktime(
                                        progdate.timetuple()) - utcoffset
                                    print[
                                        pid, title, 0, programme['duration'],
                                        programme['start'], utcoffset
                                    ]
                                    data = [
                                        pid, title, 0, programme['duration'],
                                        timestamp, utcoffset
                                    ]
Exemple #43
0
 def map_hashtag_to_locFreq(self, key, line):
     if False: yield
     listt = cjson.decode(line)
     for [uid, freq] in listt[1].iteritems():
         self.hashtagdict[uid][listt[0]] = freq
Exemple #44
0
import find_mk_params, sys, os, re, cjson

mkout_dir = find_mk_params.run_name + '/mktest_out/'
gm = cjson.decode(open(find_mk_params.run_name + '/genemaps.json').read())
sn = cjson.decode(open(find_mk_params.run_name + '/species_names.json').read())
geneFamilies = gm['geneFamilies']
geneToSpecies = gm['geneToSpecies']
outf = open(find_mk_params.run_name + '/alpha_values.txt', 'w')
outf.write("K12Gene\tGene\tAlpha\tDn\tDs\tPn\tPs\n")
for gf in geneFamilies:
    gf.sort()
    outfname = mkout_dir + str(repr(gf).__hash__()) + '.txt'
    try:
        outfd = open(outfname)
        mktest_results = outfd.read()
    except Exception as e:
        sys.stderr.write("Couldn't read output for %s (%s)\n" % (outfname, e))
        continue
    fixedAS = re.findall('#Fixed\s+(\d+)\s+(\d+)', mktest_results)
    polyAS = re.findall('#Poly\s+(\d+)\s+(\d+)', mktest_results)
    if len(fixedAS) != 1:
        sys.stderr.write("File %s has %i #Fixed lines\n" %
                         (outfname, len(fixedAS)))
        continue
    if len(polyAS) != 1:
        sys.stderr.write("File %s has %i #Poly lines\n" %
                         (outfname, len(polyAS)))
        continue
    Dn, Ds = fixedAS[0]
    Pn, Ps = polyAS[0]
    Dn, Ds, Pn, Ps = map(int, (Dn, Ds, Pn, Ps))
    for key, value in dictionary.iteritems():
        if isinstance(value, dict):
            outfile.write('%s%s\n' % (ident, key))
            print_dict(value, outfile, ident + '  ')
        else:
            outfile.write(ident + '%s = %s\n' % (key, value))


#G=nx.DiGraph()
#G.add_node('ROOT')

#args rootdict  unitags    gpd    folk removelist
infile5 = sys.argv[5]
infile5 = open(infile5, 'r')
removelist = cjson.decode(infile5.readline())

taglist = []
infile1 = sys.argv[2]  #'/spare/wei/local/uni_tag_ch3'
#infile1='./link-dict_lt10'
infile1 = open(infile1, 'r')
taglist = cjson.decode(infile1.readline())
taglist = taglist.keys()

#infile='/spare/wei/root_dict_en_0.1_l1-3'
infile = sys.argv[1]  #'/spare/wei/local/root_dict_ch3-log'
#infile='/spare/wei/root_dict_en_log_0.1_v2'
#infile='root_dict_lt10_schz_0.6'
infile = open(infile, 'r')
graph_parent_dict = {}
graph_parent_dict_s = {}
Exemple #46
0
                    try:
                        homedir = os.path.expanduser("~")
                        file = open(homedir + "/twitter-login.conf", 'r')
                        save = True
                    except IOError, e:
                        print(
                            "Failed to load config file - not saving oauth keys: "
                            + str(e))

                    if save:
                        raw_config = file.read()

                        file.close()

                        # Read config and add new values
                        config = cjson.decode(raw_config)
                        config['key'] = access_token['oauth_token']

                        config['secret'] = access_token['oauth_token_secret']

                        raw_config = cjson.encode(config)

                        # Write out the new config file
                        try:
                            file = open(homedir + "/twitter-login.conf", 'w')
                            file.write(raw_config)
                            file.close()
                        except IOError, e:
                            print("Failed to save oauth keys: " + str(e))

                    self.keypair = [
Exemple #47
0
import cjson
from collections import defaultdict
from operator import itemgetter
gpdp = '../backbone1/gpd_%s'
bb = defaultdict(dict)
bbb = defaultdict(dict)
ll = [
    'atlanta', 'new_york', 'la', 'seatle', 'houston', 'dallas', 'indiana',
    'miami', 'sf', 'chicago'
]
out1 = open('cnts', 'w')
out2 = open('cnts1', 'w')
for key in ll:
    gpd = gpdp % key
    inf = open(gpd, 'r')
    gpddict = cjson.decode(inf.readline())
    for key, value in gpddict.iteritems():
        #bb[key][value]=bb[key].get(value,0)+1
        bb[key][value[0]] = bb[key].get(value[0], 0) + 1
        bbb[key][value[0]] = bbb[key].get(value[0], 0) + value[1]
bb1 = defaultdict(list)
cnt = 0
for key, value in bb.iteritems():
    xxx = sorted(value.items(), key=itemgetter(1), reverse=1)
    prb = sorted(bbb[key].items(), key=itemgetter(1), reverse=1)
    #    print key,xxx
    print >> out1, key, xxx, prb
    if len(xxx) >= 2 and xxx[0][1] == xxx[1][1] and xxx[0][1] >= 2:
        #print>>out2,key,xxx,prb
        eqlist = []
        for item in xxx:
Exemple #48
0
 def load(self):
     data = self.file.read()
     return json.decode(data)
Exemple #49
0
 def decode(self, response_object):
     #return self._decode_dates(json.load(response_object))
     return self._decode_dates(
         cjson.decode(unicode(response_object.read(),
                              'utf-8')))  #@UndefinedVariable
Exemple #50
0
# {
# "connected":"69",
# "when":["Morning","Noon"],
# "match":"one"
# }
# endmsg
# end
# parameters are separated by comas. First one
# is connected value to trigger, other ones are
# one or more periods where state must become "pushed"
import Crossfire
import string
from CFTimeOfDay import TimeOfDay
import cjson
event = Crossfire.WhatIsEvent()
parameters = cjson.decode(event.Message)
alreadymatched = (event.Value != 0)
connected = int(parameters["connected"])
inverse = "inverse" in parameters and parameters["inverse"] == True
match = False
if not "match" in parameters:
    Crossfire.Log(
        Crossfire.LogError,
        "Script push_period.py didn't get a 'match' parameter. Only got %s" %
        parameters)
elif parameters["match"].lower() == "one":
    match = TimeOfDay().matchAny(parameters["when"]) != inverse
elif parameters["match"].lower() == "all":
    match = TimeOfDay().matchAll(parameters["when"]) != inverse
else:
    Crossfire.Log(
Exemple #51
0
############################## Load genemaps.json
import itertools
import cjson
gm=cjson.decode(open('genemaps.json').read())
geneFamilies=gm['geneFamilies']
geneToSpecies=gm['geneToSpecies']
geneToOrthologs=gm['geneToOrthologs']

############################## Upload genemaps.json to sqlite
import sqlite3
conn=sqlite3.connect('example.db')
c=conn.cursor()
def entryFromGene(g):
    species = geneToSpecies[g]
    orthlgs = ' '.join(geneToOrthologs[g])
    ontl = None
    return (g, species, orthlgs, ontl)
c.executemany('INSERT INTO Genes VALUES (?,?,?,?)', itertools.imap(entryFromGene,geneToSpecies.iterkeys()))
conn.commit()

############################## Add nucleotide_seq_fasta and uniprot_xml TEXT columns
import sqlite3
conn=sqlite3.connect('example.db')
c=conn.cursor()
c.execute('ALTER TABLE Genes ADD COLUMN nucleotide_seq_fasta TEXT')
c.execute('ALTER TABLE Genes ADD COLUMN uniprot_xml TEXT')
conn.commit()

############################## Add fasta sequences
import itertools, cjson, sqlite3
gs=cjson.decode(open('gene_sequences.json').read())
def deserialize(msg):
    return cjson.decode(msg)
Exemple #53
0
def genotype_str(genotype):
    return fold(operator.add, [allele * count for allele, count in genotype])


if __name__ == '__main__':

    ploidy = 2  # assume ploidy 2 for all individuals and all positions

    potential_alleles = ['A', 'T', 'G', 'C']

    # genotypes are expressed as sets of allele frequencies
    genotypes = list_genotypes_to_count_genotypes(
        list(multiset.multichoose(ploidy, potential_alleles)))

    for line in sys.stdin:
        position = cjson.decode(line)
        #print position['position']
        samples = position['samples']

        position['coverage'] = sum([
            len(sample['alleles'])
            for samplename, sample in samples.iteritems()
        ])

        #potential_alleles = ['A','T','G','C']
        potential_alleles = set()
        for samplename, sample in samples.items():
            # only process snps and reference alleles
            alleles = [
                allele for allele in sample['alleles']
                if allele['type'] in ['reference', 'snp']
    0,
    'num':
    250,
    'noIL':
    1,
    'restype':
    'company',
    'sortas':
    'MarketCap',
    'q':
    '[(exchange == "TSE") & (market_cap >= 1000000000) & (dividend_yield >= 3) & (dividend_recent_quarter > 0) & (last_price > 0)]'
}
response = requests.get(url, params=payload)

# Parse response using cjson
json_response = cjson.decode(response.text)

pdb.set_trace()

# Create list of stocks - [cid, symbol, name]
symbols = []
for company in json_response['searchresults']:
    # Dynamically load named values into a dictionary
    values = {}
    for column in company['columns']:
        values[column['field']] = column['value']

    # Cast yield, dividend and price
    dividend_yield = float(values['DividendYield']) / 100
    price = float(values['QuoteLast'])
    dividend = float(values['DividendRecentQuarter'])
Exemple #55
0
    def doStuff(self, channel):
        # Check what's on for each channel
        self.send(channel, "whatson")
        while not self.dataReady("whatson"):
            pass
        data = self.recv("whatson")
        if data == None:
            pid = None
        else:
            pid = data[0]
            title = data[1]
            offset = data[2]
            duration = data[3]
            expectedstart = data[4]
        if pid != self.channels[channel]:
            # Perhaps just do a duplicate scan before creating Twitter stream
            if pid == None:
                self.channels[channel] = None
                print(channel + ": Off Air")
            else:
                self.channels[channel] = pid
                self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"],
                          "dataout")
                while not self.dataReady("datain"):
                    pass
                recvdata = self.recv("datain")

                if recvdata[0] == "OK":
                    programmedata = recvdata[1]
                else:
                    # Fake programme data to prevent crash - not ideal
                    programmedata = '<?xml version="1.0" encoding="utf-8"?> \
                                    <rdf:RDF xmlns:rdf      = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \
                                             xmlns:rdfs     = "http://www.w3.org/2000/01/rdf-schema#" \
                                             xmlns:owl      = "http://www.w3.org/2002/07/owl#" \
                                             xmlns:foaf     = "http://xmlns.com/foaf/0.1/" \
                                             xmlns:po       = "http://purl.org/ontology/po/" \
                                             xmlns:mo       = "http://purl.org/ontology/mo/" \
                                             xmlns:skos     = "http://www.w3.org/2008/05/skos#" \
                                             xmlns:time     = "http://www.w3.org/2006/time#" \
                                             xmlns:dc       = "http://purl.org/dc/elements/1.1/" \
                                             xmlns:dcterms  = "http://purl.org/dc/terms/" \
                                             xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \
                                             xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \
                                             xmlns:event    = "http://purl.org/NET/c4dm/event.owl#"> \
                                    </rdf:RDF>'

                # RDF reader needs to read from a file so write out first
                # Alternative is to read from a URL, but this lacks proper proxy support
                filepath = "tempRDF.txt"
                file = open(filepath, 'w')
                file.write(programmedata)
                file.close()

                g = Graph()
                # This is a temporary proxy fix. A URL could be put here instead
                g.parse("tempRDF.txt")

                # Identify the brand and whether there are any official hashtags
                twittags = list()
                for bid in g.subjects(object=rdflib.URIRef(
                        'http://purl.org/ontology/po/Brand')):
                    # bid is Brand ID
                    bidmod = bid.replace("#programme", "")
                    bidmod = str(bidmod.replace("file:///programmes/", ""))
                    if self.officialbrandtags.has_key(bidmod):
                        twittags = self.officialbrandtags[bidmod]
                        break

                # Identify the series and whether there are any official hashtags
                if len(twittags) == 0:
                    # Identify the brand and whether there are any official hashtags
                    for sid in g.subjects(object=rdflib.URIRef(
                            'http://purl.org/ontology/po/Series')):
                        # sid is Series ID
                        sidmod = sid.replace("#programme", "")
                        sidmod = str(sidmod.replace("file:///programmes/", ""))
                        if self.officialseriestags.has_key(sidmod):
                            twittags = self.officialseriestags[sidmod]
                            break

                vidmod = ""
                so = g.subject_objects(predicate=rdflib.URIRef(
                    'http://purl.org/ontology/po/version'))
                # Pick a version, any version - for this which one doesn't matter
                for x in so:
                    # vid is version id
                    vid = x[1]
                    vidmod = vid.replace("#programme", "")
                    vidmod = vidmod.replace("file:///programmes/", "")
                    break

                # Got version, now get people

                self.send(
                    ["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"],
                    "dataout")
                while not self.dataReady("datain"):
                    pass
                recvdata = self.recv("datain")
                if recvdata[0] == "OK":
                    versiondata = recvdata[1]
                else:
                    versiondata = '<?xml version="1.0" encoding="utf-8"?> \
                                    <rdf:RDF xmlns:rdf      = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \
                                             xmlns:rdfs     = "http://www.w3.org/2000/01/rdf-schema#" \
                                             xmlns:owl      = "http://www.w3.org/2002/07/owl#" \
                                             xmlns:foaf     = "http://xmlns.com/foaf/0.1/" \
                                             xmlns:po       = "http://purl.org/ontology/po/" \
                                             xmlns:mo       = "http://purl.org/ontology/mo/" \
                                             xmlns:skos     = "http://www.w3.org/2008/05/skos#" \
                                             xmlns:time     = "http://www.w3.org/2006/time#" \
                                             xmlns:dc       = "http://purl.org/dc/elements/1.1/" \
                                             xmlns:dcterms  = "http://purl.org/dc/terms/" \
                                             xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \
                                             xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \
                                             xmlns:event    = "http://purl.org/NET/c4dm/event.owl#"> \
                                    </rdf:RDF>'

                filepath = "tempRDF.txt"
                file = open(filepath, 'w')
                file.write(versiondata)
                file.close()

                g = Graph()
                g.parse("tempRDF.txt")

                # Identify if this is a change of programme, or the first time we've checked what's on for print clarity
                if self.firstrun:
                    print(channel + ": " + title)
                else:
                    print(channel + ": Changed to - " + title)

                # Minor alterations
                title = title.replace("&", "and")

                if ":" in title:
                    titlebits = title.split(":")
                    title = titlebits[0]

                # Saving a copy here so apostrophes etc can be used in the Twitter people search
                titlesave = title

                # Remove punctuation
                for item in """!"#$%()*+,-./;<=>?@[\\]?_'`{|}?""":
                    title = title.replace(item, "")

                keywords = dict()
                # Save keywords next to a descriptor of what they are
                keywords[pid] = "PID"

                # Add official hashtags to the list
                for tag in twittags:
                    keywords[tag] = "Twitter"

                # Duplicates will be removed later
                # If the title has 'The' in it, add hashtags both with and without the 'the' to the keyword list
                # This simply broadens the list of search terms
                if string.find(title, "The", 0, 3) != -1:
                    newtitle = string.replace(re.sub("\s+", "", title), "The ",
                                              "", 1)
                    keywords[channel] = "Channel"
                    keywords["#" +
                             string.lower(re.sub("\s+", "", title))] = "Title"
                    # Check for and remove year too - some programmes contain a year which may be undesirable from a search point of view
                    keywords["#" + string.replace(
                        string.lower(re.sub("\s+", "", title)), " " +
                        str(date.today().year), "", 1)] = "Title"
                    keywords[
                        '#' +
                        string.lower(re.sub("\s+", "", newtitle))] = "Title"
                    # Check for and remove year too
                    keywords['#' + string.replace(
                        string.lower(re.sub("\s+", "", newtitle)), " " +
                        str(date.today().year), "", 1)] = "Title"
                else:
                    keywords[channel] = "Channel"
                    keywords["#" +
                             string.lower(re.sub("\s+", "", title))] = "Title"
                    keywords["#" + string.replace(
                        string.lower(re.sub("\s+", "", title)), " " +
                        str(date.today().year), "", 1)] = "Title"

                allwordtitle = string.replace(title, "The ", "", 1)
                allwordtitle = allwordtitle.lower()
                # Remove current year from events
                allwordtitle = allwordtitle.replace(
                    " " + str(date.today().year), "", 1)
                titlewords = allwordtitle.split()
                if len(titlewords) > 1:
                    keywords[allwordtitle] = "Title"
                else:
                    # Trial fix for issue of one word titles producing huge amounts of data
                    # This occurs for keywords like 'Weather' and 'Breakfast' which aren't BBC limited terms
                    keywords[allwordtitle + "^" + "bbc"] = "Title"
                keywords["#" + re.sub("\s+", "", allwordtitle)] = "Title"

                # Where a channel uses text for a number, we also want to search using the numeric representation
                numwords = dict({
                    "one": 1,
                    "two": 2,
                    "three": 3,
                    "four": 4,
                    "five": 5,
                    "six": 6,
                    "seven": 7
                })
                for word in numwords:
                    if word in channel.lower(
                    ) and channel != "asiannetwork":  # Bug fix! asianne2rk
                        numchannel = string.replace(channel.lower(), word,
                                                    str(numwords[word]))
                        keywords[numchannel] = "Channel"
                        break
                    if str(numwords[word]) in channel.lower():
                        numchannel = string.replace(channel.lower(),
                                                    str(numwords[word]), word)
                        keywords[numchannel] = "Channel"
                        break

                # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch)
                save = False
                try:
                    homedir = os.path.expanduser("~")
                    file = open(homedir + "/namecache.conf", 'r')
                    save = True
                except IOError, e:
                    print(
                        "Failed to load name cache - will attempt to create a new file: "
                        + str(e))

                if save:
                    raw_config = file.read()
                    file.close()
                    try:
                        config = cjson.decode(raw_config)
                    except cjson.DecodeError, e:
                        config = dict()
                else:
                    config = dict()

                # Find people's names in retrieved RDF
                s = g.subjects(
                    predicate=rdflib.URIRef(
                        'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
                    object=rdflib.URIRef('http://purl.org/ontology/po/Role'))

                for x in s:
                    rid = g.value(predicate=rdflib.URIRef(
                        'http://purl.org/ontology/po/role'),
                                  object=rdflib.BNode(x))
                    pid = g.value(
                        subject=rdflib.BNode(rid),
                        predicate=rdflib.URIRef(
                            'http://purl.org/ontology/po/participant'))
                    firstname = str(
                        g.value(subject=rdflib.BNode(pid),
                                predicate=rdflib.URIRef(
                                    'http://xmlns.com/foaf/0.1/givenName')))
                    lastname = str(
                        g.value(subject=rdflib.BNode(pid),
                                predicate=rdflib.URIRef(
                                    'http://xmlns.com/foaf/0.1/familyName')))

                    if config.has_key(firstname + " " + lastname):
                        # Found a cached value - this person has been searched for using Twitter
                        if config[firstname + " " + lastname] != "":
                            keywords[config[firstname + " " +
                                            lastname]] = "Twitter"
                    else:
                        # Not cached yet - new request to Twitter people search
                        self.send(firstname + " " + lastname, "search")
                        while not self.dataReady("search"):
                            pass
                        twitdata = self.recv("search")
                        screenname = ""
                        try:
                            for user in twitdata:
                                # Only use this Twitter screen name if there's a good chance they're the person we're after
                                if user.has_key('verified'):
                                    if (user['verified'] == True
                                            or user['followers_count'] > 10000
                                        ) and string.lower(
                                            user['name']) == string.lower(
                                                firstname + " " + lastname):
                                        screenname = user['screen_name']
                                        keywords[screenname] = "Twitter"
                                        break
                        except AttributeError, e:
                            pass
                        config[firstname + " " + lastname] = screenname
                    keywords[firstname + " " + lastname] = "Participant"
Exemple #56
0
 def read_json_yield_uid(line):
     line = cjson.decode(line)
     user_id = line['user_id']
     tags = line['tag']
     for tag in tags:
         yield user_id, tag
Exemple #57
0
def print_dict(dictionary,outfile,ident = '', braces=1):

    """ Recursively prints nested dictionaries."""
    
    for key, value in dictionary.iteritems():
        if isinstance(value, dict):
            outfile.write(  '%s%s\n' %(ident,key) )
            print_dict(value, outfile, ident+'  ')
        #else:
        #    outfile.write(ident+'%s = %s\n' %(key, value))
infile='../graph/gpd_hou'#'backbone-conf-5'
#infile='../backbone1/gpd_la'
list1=[]

infile=open(infile,'r')
infile=cjson.decode(infile.readline())
print len(infile)
#for key,value in infile.iteritems():
#    if key not in list1:
#        list1.append(key)
#    if value not in list1:
#        list1.append(value)
#print len(list1)
dict1={}
for key,value in infile.iteritems():
    dict1[key]=value[0]
#if 'sport news' in dict1.keys():
#    print '------'
outfile='folk_hou_cost'#'folk-backbone5'
outfile=open(outfile,'w')
a=build_tree(dict1)
Exemple #58
0
 def jloads(json_string):
     """
     Deserializes ``json_string`` (a string containing a JSON document)
     to a Python object, using cjson.
     """
     return cjson.decode(json_string)
Exemple #59
0
def jloads(json_string):
    global cjson
    if cjson:
        return cjson.decode(json_string)
    else:
        return json.loads(json_string)
Exemple #60
0
import cjson
import sys
from operator import itemgetter
from collections import defaultdict
from copy import deepcopy
infile = sys.argv[1]  #'/spare/wei/local/rulelist_en_ch3-log'

outfile = sys.argv[2]  #'/spare/wei/local/root_dict_ch3-log'
#infile='rulelist_lt10_schz_0.6'
#outfile='root_dict_lt10_schz_0.6'
outfile = open(outfile, 'w')
root_dict = defaultdict(list)
for line in open(infile, 'r'):
    line = cjson.decode(line)
    #print line[0]
    root_dict[line[0]].append([line[1], line[2], line[3]])
    #root_dict[line[1]].append([line[0],line[2],line[3],line[2]*line[3]])
sorted_root_dict = defaultdict(list)
for key, value in root_dict.iteritems():
    v = deepcopy(value)
    v = sorted(v, key=itemgetter(1), reverse=1)
    sorted_root_dict[key] = v
for key, value in sorted_root_dict.iteritems():
    outfile.write(cjson.encode([key, value]) + '\n')