def GenCatalogEntry(row): i_deps = cjson.decode(row[7]) if i_deps: i_deps_str = "|".join(i_deps) else: i_deps_str = "none" deps_with_desc = cjson.decode(row[6]) deps = [x[0] for x in deps_with_desc if x[0].startswith('CSW')] if deps: deps_str = '|'.join(deps) else: deps_str = "none" entry = representations.CatalogEntry( catalogname=row[0], # 0 version=row[1], # 1 pkgname=row[2], # 2 basename=row[3], # 3 md5_sum=row[4], # 4 size=str(row[5]), # 5 deps=deps_str, # 6 category="none", # 7 i_deps=i_deps_str, # 8 desc=row[8], # 9 ) return entry
def transform_graph(self): """ This function transform ids to corresponding screen_name in the user_graph.txt file """ f = open("../semantic_analysis/data/labelled_user_dic_393.json", "r") user_dic = cjson.decode(f.readline()) f.close() f = open("../semantic_analysis/data/labelled_id_name_map_393.json", "r") id_label_map = cjson.decode(f.readline()) f.close() node_list, edge_list = self.read_node_edge_lists("user_graph/user_graph_393_2.txt") f = open("user_graph/user_graph_393_screen_name_2.txt","w") f.write("#nodes-screen_name\n") for user in user_dic: f.write(user+" "+user_dic[user]["label"]+"\n") f.write("#edges-screen_name\n") for edge in edge_list: edge_with_name = [] flag = 0 for node in edge: if id_label_map.has_key(node): edge_with_name.append(id_label_map[node]["screen_name"]) else: flag = 1 break if flag == 0: f.write(edge_with_name[0]+" "+edge_with_name[1]+"\n") f.close()
def json_parser(source, logger=None): """ JSON parser based on json module. It accepts either source descriptor with .read()-supported file-like object or data as a string object. """ if isinstance(source, InstanceType) or isinstance(source, file): # got data descriptor try: jsondict = cjson.decode(source.read()) except Exception as exc: print_exc(exc) source.close() raise source.close() else: data = source # to prevent unicode/ascii errors like # UnicodeDecodeError: 'utf8' codec can't decode byte 0xbf in position # if isinstance(data, str): # data = unicode(data, errors='ignore') # res = data.replace('null', '\"null\"') #else: res = data try: jsondict = cjson.decode(res) except: msg = "json_parser, WARNING: fail to JSON'ify data:" msg += "\n%s\ndata type %s" % (res, type(res)) if logger: logger.warning(msg) else: print msg jsondict = eval(res, { "__builtins__": None }, {}) yield jsondict
def test_31_exports(self): # public key (resp, data) = self.client.request('GET', '/openssl_export', {'name': 'usercert1.pub'}) self.assertEqual(resp.status, 200) try: data = cjson.decode(data) except Exception: fail('cannot decode cjson data') self.assertTrue(data.startswith('-----BEGIN PUBLIC KEY-----')) # private key (resp, data) = self.client.request('GET', '/openssl_export', {'name': 'usercert1.key'}) self.assertEqual(resp.status, 200) try: data = cjson.decode(data) except Exception: fail('cannot decode cjson data') self.assertFalse(data.startswith('-----BEGIN PUBLIC KEY-----')) # limit cases # . no name (resp, data) = self.client.request('GET', '/openssl_export', {}) self.assertEqual(resp.status, 400) # . file not found (resp, data) = self.client.request('GET', '/openssl_export', {'name': 'foo.bar'}) self.assertEqual(resp.status, 404)
def process_user_graph(): f = open("../user_dic/user_dic_09_wids_2.json","r") user_dic = cjson.decode(f.readline()) user_ids = set() for user in user_dic: user_ids.add(str(user_dic[user]["id"])) print len(user_dic) print len(user_ids) f.close() fo = open("../social_graph/graph_reduced.txt","w") fo.write("#nodes\n") #for user in user_dic: # fo.write(str(user_dic[user]["id"])+"\n") for user in user_ids: fo.write(user+"\n") fo.write("#edges\n") f = gzip.open("../social_graph/user_friends_09_total.json.gz") user_friends = cjson.decode(f.readline()) print len(user_friends) for user in user_friends: if user in user_ids: for friend in user_friends[user]: if str(friend) in user_ids: fo.write(user+" "+str(friend)+"\n") else: pass f.close() fo.close()
def pre_process(): """ Dump user friends for whole month for September """ user_friends = {} f = gzip.open("../social_graph/user_friends_09.json.gz", "r") for line in f: data = cjson.decode(line) for key in data: if not user_friends.has_key(key): user_friends.update({key : data[key]}) else: pass f.close() f = gzip.open("../social_graph/user_friends_09_2.json.gz", "r") for line in f: data = cjson.decode(line) for key in data: if not user_friends.has_key(key): user_friends.update({key : data[key]}) else: pass f.close() f = gzip.open("../social_graph/user_friends_09_total.json.gz","w") json.dump(user_friends, f) f.close()
def mqCallback(self, channel, method_frame, header_frame, body): try: if not self.zk.is_proxy_master(): return # master's business data_dict = cjson.decode(body) # ** MUST ** ack channel.basic_ack(method_frame.delivery_tag) utils.log(utils.cur(), body, data_dict) if not isinstance(data_dict, dict): return for db, forbid in data_dict.iteritems(): if not forbid[Forbid.KEY_TYPE] in (Forbid.FORBID_WORKING, Forbid.FORBID_FOREVER): return forbid[Forbid.KEY_START] = time.time() path = os.path.join(ZKConf.ZK_PATH_FORBID, db) orig = self.get_path(path) if orig is False: self.zk.mknode(path, cjson.encode(forbid)) else: old = cjson.decode(orig) if ( old[Forbid.KEY_TYPE] == forbid[Forbid.KEY_TYPE] and old[Forbid.KEY_TYPE] == Forbid.FORBID_WORKING and old[Forbid.KEY_START] + old[Forbid.KEY_DURATION] > time.time() ): utils.log(utils.cur(), "still forbidding") else: utils.log(utils.cur(), "change forbid") # change /database/forbid/db self.forbidinfo[db] = forbid self.zk.set(path, cjson.encode(forbid)) except Exception, err: utils.err(utils.cur(), err)
def _eval_response(self,r): #print '_eval_response r',r try: json = decode(r) except DecodeError: # This is for the case of, e.g., # {"status":"Ok.","response":{1:"I1\n."}} # where cjson won't let us use 1 as a key. import re p = re.compile('\{(\d+)\:') for m in p.finditer(r): r = r.replace(m.group(0),'{"n%s":' % m.group(1)) #print '_eval_response r modified',r json = decode(r) if json.get('status') == 'Ok.': d = {} pairs = json.get('response',"NULL.") #if not pairs in ["NULL.","PONG."]: if isinstance(pairs,dict): for k,v in pairs.items(): d[self._if_numeric(k)] = self._loads(v) if d.keys(): json['response'] = d else: json['response'] = pairs return json
def main(): n_sample=0 filteredData = open(FILTERED_DATA_PATH_JSON,'r') for line in filteredData: #parsing tweets data file to generate a graph by adding edges data = cjson.decode(line) n_sample+=1 if data["postId"] not in GlobalPostUserDict: GlobalPostUserDict[data["postId"]]=data["ownerUserId"] accumulateTags(data["tags"]) n_feature=len(GlobalTagList) global clusterData clusterData=np.zeros(shape=(n_sample,n_feature), dtype=float) labels=[] sampleId=0 filteredData = open(FILTERED_DATA_PATH_JSON,'r') for line in filteredData: #parsing tweets data file to generate a graph by adding edges data = cjson.decode(line) labels.append(data["postId"]) processrecord(data,sampleId) sampleId+=1 clustersize=100 createKcluster(clusterData,clustersize,len(labels),labels)
def prediction_by_major_topic(self,rulelistfile,ground_truth_file,user_tag_dict_file,tfd,cosine): #def prediction_by_strong_rule(self,rulelistfile,ground_truth_file,user_tag_dict_file): #order the rules of existing tag in decreasing order in one list. user_tag_dict={} ground_truth=defaultdict(dict) rule_by=defaultdict(list) rule_by1=defaultdict(dict) #candidate=[] for line in open(user_tag_dict_file,'r'): line=cjson.decode(line) user_tag_dict[line[0]]=line[1] for line in open(ground_truth_file,'r'): line=cjson.decode(line) ground_truth[line[0]]=line[1] for line in open(rulelistfile,'r'): line=cjson.decode(line) if line[2]>0.5: rule_by[line[0]].append(line) rule_by1[line[0]][line[1]]=line[2] tagfile=open(tfd,'r') tfd=cjson.decode(tagfile.readline()) cosine=open(cosine,'r') cosine=cjson.decode(cosine.readline()) # if line[0] in ground_truth and line[1] not in groud_truth and line[2]>0.5: # rule_by[line[0]].append(line) #candidate.append(line[1]) #for key in rule_by: # rule_by[key]=sorted(rule_by[key],key=itemgetter(2),reverse=1) #print rule_by[1] cnt=0 cntt=0 cntp=0 for user,utd in user_tag_dict.iteritems(): candidate=[] for key in utd: for rule in rule_by[key]: if rule[1] not in utd: candidate.append([rule[0],rule[1]]) canscore=[] for can in candidate: score=0 for item in utd: cosinev=cosine.get(can+'_'+item,0) cosineq=cosine.get(item+'_'+can,0) score+=(cosinev+consineq)*log(utd[item]) #lets use sum(sim(k,j)*log(f(j))) canscore.append([can[1],score]) #print canscore #print ground_truth[user] if canscore!=[]: tag=sorted(canscore,key=itemgetter(1),reverse=1)[0] #print tag cnt+=1 if tag[0] in ground_truth[user]: cntt+=1 cntp+=ground_truth[user][tag[0]] print cnt,cntt,cntp
def decode(data): if not data: return data # return json.loads(data) try: return cjson.decode(data, encoding='utf-8') except cjson.DecodeError: return cjson.decode(data)
def thrash_cjson(): for obj, tns, json, msh in TESTS: if TEST_DUMP_ONLY: cjson.encode(obj) elif TEST_LOAD_ONLY: assert cjson.decode(json) == obj else: assert cjson.decode(cjson.encode(obj)) == obj
def webPlaylist(): """ Choose the next song, given a current song and a list of forbidden songs """ seeds = json.decode(flask.request.form.get("seeds", "")) not_list = json.decode(flask.request.form.get("not_list", "")) return json.encode(playlist.nextSong(seeds, not_list))
def __call__(self, value=None): if value is None or value == '': raise ValidationError(message=self.message, code=self.code) import cjson try: cjson.decode(value) except: raise ValidationError(message=self.message, code=self.code)
def iterateJsonFromFile(file, remove_params_dict=False): for line in open(file): try: if not remove_params_dict: yield cjson.decode(line) else: data = cjson.decode(line) if 'PARAMS_DICT' not in data: yield data except: pass
def main(prefix, date): json = open(os.path.join(json_path, '%s_%s.json' % (prefix, date))) pos = open(os.path.join(json_path, '%s_positions.json' % prefix)) timestamp, nodes, ways, rels, tags = cjson.decode(json.readline()) positions = cjson.decode(pos.readline()) make_pickles(prefix, date, nodes, ways, rels, tags, positions)
def webPlaylist(): ''' Choose the next song, given a current song and a list of forbidden songs ''' seeds = json.decode(flask.request.form.get('seeds', '')) not_list = json.decode(flask.request.form.get('not_list', '')) return json.encode(playlist.nextSong(seeds, not_list))
def prediction_by_strong_rule_of_most_voted_tag(self,rulelistfile,ground_truth_file,user_tag_dict_file): #order the rules for each tag in a list of list user_tag_dict={} ground_truth=defaultdict(dict) rule_by_tag=defaultdict(list) for line in open(user_tag_dict_file,'r'): line=cjson.decode(line) user_tag_dict[line[0]]=line[1] for line in open(ground_truth_file,'r'): line=cjson.decode(line) ground_truth[line[0]]=line[1] for line in open(rulelistfile,'r'): line=cjson.decode(line) if line[2]>0.5: rule_by_tag[line[0]].append(line) for key in rule_by_tag: rule_by_tag[key]=sorted(rule_by_tag[key],key=itemgetter(2),reverse=1) cnt=0 cntt=0 cntp=0 for user,utd in user_tag_dict.iteritems(): #print user #if user==34272713: # print utd utd1=sorted(utd.items(),key=itemgetter(1),reverse=1) found=False while found==False: for item in utd1: if item[0] in rule_by_tag: rules=rule_by_tag[item[0]] for rule in rules: if rule[1] not in utd: #print user,rule[1] cnt+=1 if rule[1] in ground_truth[user]: cntt+=1 cntp+=ground_truth[user][rule[1]] #if user=='34272713': #print user,rule else: #if user==61521908: # print utd #print user,rule,ground_truth[user] pass found=True break else: continue if found==True: break else: continue break print cnt,cntt,cntp
def hasDifference(self, account, local_json, remote_json, icloud=False): changed_keys = set() BlinkLogger().log_debug(u"Computing differences from iCloud for %s" % account.id) try: local_data = cjson.decode(local_json) except TypeError: return True try: remote_data = cjson.decode(remote_json) except TypeError: return True differences = DictDiffer(local_data, remote_data) diffs = 0 for e in differences.changed(): if e in self.skip_settings: continue BlinkLogger().log_debug('Setting %s has changed' % e) changed_keys.add(e) diffs += 1 for e in differences.added(): if e in self.skip_settings: continue BlinkLogger().log_debug('Setting %s has been added' % e) if not local_data.has_key(e): BlinkLogger().log_debug('Remote added') elif not remote_data.has_key(e): BlinkLogger().log_debug('Local added') changed_keys.add(e) diffs += 1 for e in differences.removed(): if e in self.skip_settings: continue BlinkLogger().log_debug('Setting %s has been removed' % e) if not local_data.has_key(e): BlinkLogger().log_debug('Local removed') if not remote_data.has_key(e): BlinkLogger().log_debug('Remote removed') changed_keys.add(e) diffs += 1 if diffs and icloud: self.notification_center.post_notification("iCloudStorageDidChange", sender=self, data=NotificationData(account=account.id, changed_keys=changed_keys)) return bool(diffs)
def construct_directed_graph_from_rules(rules_file,tagfreqdict): graph=defaultdict(dict) tagfreqdict=cjson.decode(tagfreqdict.readline()) for rule in rules_file: rule = cjson.decode(rule) if rule[2]>0.3: graph[rule[1]][rule[0]]=-rule[2] for tag in tagfreqdict: graph['ROOT'][tag]=-0.1 return graph
def store_document(storage, index, document): cluster_index = str(int(index[0:2], 16)) # to check the validity of the document on write if CHECK_VALID_JSON_DOCUMENT: json.decode(document) fh = open(os.path.join(storage, cluster_index, index), "wb") # simple os lock system #fcntl.lockf(fh, fcntl.LOCK_EX) fh.write(document) fh.close()
def test_join_pool(self): req = join_pool_request('127.0.1.1:8080', '127.0.1.1:8081') data = urllib2.urlopen(req) self.assertEqual( cjson.decode(data.read()), [ {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"}, {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"} ] ) req = get_pool_request('127.0.0.1:8081') data = urllib2.urlopen(req) self.assertEqual( cjson.decode(data.read()), [ {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"}, {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"} ] ) req = get_pool_request('127.0.0.1:8080') data = urllib2.urlopen(req) self.assertEqual( cjson.decode(data.read()), [ {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"}, {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"} ] ) req = join_pool_request('127.0.1.1:8082', '127.0.1.1:8081') data = urllib2.urlopen(req) self.assertEqual( cjson.decode(data.read()), [ {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"}, {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"}, {"index": 2, "state": "pooling", "address": "127.0.1.1:8082"} ] ) req = get_pool_request('127.0.0.1:8081') data = urllib2.urlopen(req) self.assertEqual( cjson.decode(data.read()), [ {"index": 0, "state": "pooling", "address": "127.0.1.1:8081"}, {"index": 1, "state": "pooling", "address": "127.0.1.1:8080"}, {"index": 2, "state": "pooling", "address": "127.0.1.1:8082"} ] )
def test_30_getinfos(self): # CA (resp, data) = self.client.request('GET', '/openssl_certificateinfos', {'name': 'catest1.crt'}) self.assertEqual(resp.status, 200) try: data = cjson.decode(data) except Exception: fail('cannot decode cjson data') self.assertEqual(data['CA'] , 1) self.assertEqual(data['length'], 1024) # self-signed certificate (resp, data) = self.client.request('GET', '/openssl_certificateinfos', {'name': 'usercert1.pem'}) self.assertEqual(resp.status, 200) try: data = cjson.decode(data) except Exception: fail('cannot decode cjson data') for k in ('sn','CA','length','fingerprint','validity-start','validity-end','subject','issuer'): self.assertTrue(k in data) for k in ('C','CN','L','O','OU','ST','emailAddress'): self.assertTrue(k in data['subject']) self.assertTrue(k in data['issuer']) self.assertEqual(data['CA'] , 0) self.assertEqual(data['length'], 432) self.assertEqual(data['subject']['CN'], 'wiki.xivo.com') # CA-signed certificat (resp, data) = self.client.request('GET', '/openssl_certificateinfos', {'name': 'usercert4.crt'}) self.assertEqual(resp.status, 200) try: data = cjson.decode(data) except Exception: fail('cannot decode cjson data') self.assertEqual(data['CA'] , 0) self.assertNotEqual(data['subject']['emailAddress'], data['issuer']['emailAddress']) # limit cases # . file do not exists (resp, data) = self.client.request('GET', '/openssl_certificateinfos', {'name': 'foo'}) self.assertEqual(resp.status, 404)
def measureDecoderThroughput(data): json=cjson.encode(data) bytes=0 st=time.time() cnt=0 while True: dt=time.time()-st if dt>=0.5 and cnt>9: break cjson.decode(json) bytes+=len(json) cnt+=1 return int(math.floor(bytes/dt/1024.0+0.5))
def testDecoderExtension(self): re_date=re.compile('^new\sDate\(Date\.UTC\(.*?\)\)') def dateDecoder(json,idx): json=json[idx:] m=re_date.match(json) if not m: raise 'cannot parse JSON string as Date object: %s'%json[idx:] args=cjson.decode('[%s]'%json[18:m.end()-2]) dt=datetime.date(*args) return (dt,m.end()) self.assertEqual(cjson.decode('[1, new Date(Date.UTC(2007,1,2)), 2]', extension=dateDecoder), [1,datetime.date(2007,1,2),2]) self.assertEqual(cjson.decode('[1, new Date(Date.UTC( 2007, 1 , 2 )) , 2]', extension=dateDecoder), [1,datetime.date(2007,1,2),2]) self.assertRaises(cjson.DecodeError, lambda: cjson.decode('1', extension=0))
def get_field_data_form_json(json_data,field_list=[]): if len(field_list) == 0: return cjson.decode(json_data) else: try: data = cjson.decode(json_data) except: data = {} temp_data = {} for i in data: if i in field_list: temp_data[i] = data[i] return temp_data
def GET(self, md5_sum): try: cat_gen_data = models.CatalogGenData.selectBy(md5_sum=md5_sum).getOne() except sqlobject.main.SQLObjectNotFound: raise web.notfound("RestSvr4CatalogData for %r not found" % md5_sum) simple_data = { 'deps': cjson.decode(cat_gen_data.deps), 'i_deps': cjson.decode(cat_gen_data.i_deps), 'pkginfo_name': cat_gen_data.pkginfo_name, 'pkgname': cat_gen_data.pkgname, } response = cjson.encode(simple_data) web.header('Content-Length', str(len(response))) return response
def testMultikeyGet(self): """Make a temp view.""" be1 = CouchStub() be1_request = be1.expect_POST("/funstuff0/_all_docs?include_docs=true") be1_request.reply(200, dict( total_rows=2, offset=0, rows=[ {"id":"a", "key":"a", "value": {"rev":"2"},"doc":"b"}, {"id":"c", "key":"c", "value": {"rev":"3"},"doc":"d"} ])) be1.listen("localhost", 23456) be2 = CouchStub() be2_request = be2.expect_POST("/funstuff1/_all_docs?include_docs=true") be2_request.reply(200, dict( total_rows=2, offset=0, rows=[ {"id":"b", "key":"b", "value": {"rev":"7"},"doc":"z"}, {"id":"y", "key":"y", "value": {"rev":"9"},"doc":"w"} ])) be2.listen("localhost", 34567) resp = post("http://localhost:22008/funstuff/_all_docs?include_docs=true", body={"keys":["a","c","x","y"]}) be1.verify() be2.verify() be1_post = cjson.decode(be1_request.input_body) be2_post = cjson.decode(be2_request.input_body) def lounge_hash(x): crc = zlib.crc32(x,0) return (crc >> 16)&0x7fff keys1 = be1_post['keys'] keys2 = be2_post['keys'] keys = {0:keys1, 1:keys2} num_shards = 2 for v, k in keys.items(): for key in k: self.assertEqual(lounge_hash(key) % num_shards, int(v)) self.assertEqual(resp.body["total_rows"], 4) self.assertEqual(resp.body["offset"], 0) self.assertEqual(len(resp.body["rows"]), 4) rows = [x["key"] for x in resp.body["rows"]] rows.sort() self.assertEqual(rows, ["a","b","c","y"])
def test_01_dhcp(self): (resp, data) = self.client.view(self.obj, 0) self.assertEqual(resp.status, 200) data = cjson.decode(data) # pprint.pprint(data) # EDIT with open('xivojson/dhcp.json') as f: content = cjson.decode(f.read()) # print content (resp, data) = self.client.edit(self.obj, content) self.assertEqual(resp.status, 200)
def sjson_parser(source, logger=None): """ SJSON parser based on cjson module. Reads data line by line in streaming json format. """ obj = None for line in source: if not obj: obj = cjson.decode(line + "]}") elif line.startswith("]"): break else: o = cjson.decode(line[1:]) yield o
if lat>24.52 and lat<49.38 and lon<-66.95 and lon>-124.77: return 1 elif lat>54.66 and lat<71.83 and ((lon<-130 and lon>-180) or (lon>173 and lon<180)): return 1 else: return 0 #for line in open(infile,'r'): # line1=cjson.decode(line) # #if 'entertain'in line1['tag'] or 'entertainment' in line1['tag']: # lat_u=line1['user_lat'] # lon_u=line1['user_lng'] # lat_c=line1['list_creator_lat'] # lon_c=line1['list_creator_lng'] # if in_box_us(lat_u,lon_u)==1 or in_box_us(lat_c,lon_c)==1: # outfile1.write(line) for line in open(infile,'r'): line1=cjson.decode(line) # if 'tech' in line1['tag'] or 'techy' in line1['tag'] or 'technology' in line1['tag']: #if 'entertain' in line1['tag'] or 'entertainment' in line1['tag'] or 'entertaining' in line1['tag'] or 'entertainer' in line1['tag'] if 'nutrition' in line1['tag']: lat_u=line1['user_lat'] lon_u=line1['user_lng'] lat_c=line1['list_creator_lat'] lon_c=line1['list_creator_lng'] if in_box_us(lat_u,lon_u)==1 or in_box_us(lat_c,lon_c)==1: outfile1.write(line)
def unpack(raw): # return decode(decompress(raw)) return decode(raw)
import cjson infile1 = '../backbone/lc/gpd_hou' infile2 = '../backbone/graph/gpd_hou_cost' outfile = open('./hou', 'w') nm = open(infile1, 'r') lc = open(infile2, 'r') nm = cjson.decode(nm.readline()) lc = cjson.decode(lc.readline()) for key, value in lc.iteritems(): if key in nm.keys(): if nm[key][0] != value[0]: print >> outfile, key, '-', value[0], '-', nm[key][0] else: print >> outfile, '---', key, '-', value
def readUsers(): f = open("users.passwd") users = f.read() f.close() users = cjson.decode(users) return users
#remove tag that are not in the tag_user_dict import sys import cjson argv = sys.argv #inputfilename unique_tag outputfilename infile1 = open(argv[1], 'r') infile2 = open(argv[2], 'r') tag_f = cjson.decode(infile2.readline()) tag_f = tag_f.keys() outfile = open(argv[3], 'w') for line in infile1: line = line.split('\t')[1] line = cjson.decode(line) dictt = {} for key, value in line[1].iteritems(): if key in tag_f and value > 1: dictt[key] = value outfile.write(cjson.encode([line[0], dictt]) + '\n')
if __name__ == "__main__": try: filename = '../../centrality/lc/rulelist_tfidf1-hou'#sys.argv[1] rules_file=open(filename,'r') tagfreqdict=open('../lc/tag_freq_dict_hou','r') root = 'ROOT' #sys.argv[2] except IndexError: sys.stderr.write('no input and/or root node specified\n') sys.stderr.write('usage: python edmonds.py <file> <root>\n') sys.exit(1) G=construct_directed_graph_from_rules(rules_file,tagfreqdict) h = edmonds(root,G) outfile=open('test','w') rootdict={} for s in h: for t in h[s]: rootdict[t]=s print>>outfile, "%s-%s" % (s,t) infile1=open('../../centrality/lc/gpd_hou','r') rootdict1=cjson.decode(infile1.readline()) for key,value in rootdict1.iteritems(): if rootdict[key]!=value[0]: print key,value, rootdict[key]
# cjson: 6.57 # json: 11.86 # simplejson: 14.98 # cPickle: 30.98 # +- 0.5 seconds try: import ujson _json_decode = ujson.decode _json_encode = ujson.encode except ImportError: try: import cjson # You can find the patch for cjson here: http://vazor.com/media/python-cjson-1.0.5.diff # An unpatched version (1.0.5 is the latest as of writing) # won't decode that correctly and thus trigger an AssertionError here: assert '/' == cjson.decode('"\/"') _json_decode = cjson.decode _json_encode = cjson.encode except (AssertionError, ImportError): try: # But - json ist slower? We will overwrite that later by simplejson. ;-) import json assert hasattr(json, "loads") and hasattr(json, "dumps") _json_decode = json.loads _json_encode = json.dumps has_python_json = True except (AssertionError, ImportError): has_python_json = False try: import simplejson
def _get_dict(self): return decode(self._get_json_string())
#http://nominatim.openstreetmap.org/reverse?format=xml&lat=52.5487429714954&lon=-1.81602098644987&zoom=18&addressdetails=1 #import requests #r=requests.post(std) #for line in r.iter_lines(): # line=cjson.decode(line) # print line # print line.keys() # print line['address']['county'] infile = '/spare/wei/folk/listings_us'#list_creator_user_location_nonsingular_100' outfile='/spare/wei/folk/list_creator_user_addr_us' web='http://nominatim.openstreetmap.org/reverse?format=json' web='http://open.mapquestapi.com/nominatim/v1/reverse.php?format=json' extra='&zoom=%2018&addressdetails=1' outfile=open(outfile,'w') for line in open(infile,'r'): line=cjson.decode(line) #print line['tag'] if 'tech' in line['tag'] or 'technology' in line['tag']: # print line['tag'] lat_1=line['list_creator_lat'] lon_1=line['list_creator_lng'] lat_2=line['user_lat'] lon_2=line['user_lng'] creator_lat='&lat='+str(lat_1) creator_lon='&lon='+str(lon_1) user_lat='&lat='+str(lat_2) user_lon='&lon='+str(lon_2) try: creator_url=web+creator_lat+creator_lon#+extra # print creator_url creator_info = urllib2.urlopen(creator_url)
def retrieve_graph_from_dbpedia(term): assert ONLINE_ENABLED logger.info('online access - DBpedia: {term}'.format(term=unicode(term))) term_utf = term.encode('utf-8') term_url = quote_plus(term_utf, safe=str("/:#,()'")) #print '---' #print 'term_url', term_url sparql = SPARQLWrapper("http://dbpedia.org/sparql") #query = """ # SELECT ?p ?o # WHERE {{ <{term_url}> ?p ?o }} #""".format(term_url=term_url) query = """ SELECT ?p ?o WHERE {{ <{term_url}> ?p ?o FILTER( STRSTARTS(STR(?p), "{foaf}") || STRSTARTS(STR(?p), "{rdf}") || STRSTARTS(STR(?p), "{rdfs}") || STRSTARTS(STR(?p), "{dcterms}") || STRSTARTS(STR(?p), "{ontology}")) FILTER (isURI(?o) || langMatches(lang(?o), "EN")) }} """.format(term_url=term_url, foaf=unicode(FOAF), rdf=unicode(RDF), rdfs=unicode(RDFS), dcterms=unicode(DCTERMS), ontology=unicode(ONTOLOGY)) sparql.setQuery(query.encode('utf-8')) sparql.setReturnFormat(JSON) try: results = sparql.query() # workaround for "Invalid \escape" error which can be raised by # convert() body = results.response.read() results = cjson.decode(body) except HTTPError as exc: # can occur if DBpedia is under maintenance (quite often) logger.error('Getting graph for {term} failed; {message}; {excType}' .format(term=term, message=exc.message, excType=unicode(type(exc)))) return None # create graph and bind relevant namespaces graph = Graph() for prefix, namespace in NAMESPACES_DICT.items(): graph.bind(prefix, namespace) LITERAL_MAX_LENGTH = 600 for result in results["results"]["bindings"]: try: p = URIRef(result['p']['value']) # filter wikiPageRevisionID, wikiPageExternalLike etc. if p.startswith(ONTOLOGY['wiki']): continue if result['o']['type'] == 'uri': o = URIRef(result['o']['value']) else: o = Literal(result['o']['value']) # if object is too long (e.g. abstract, ignore it) if len(o) > LITERAL_MAX_LENGTH: continue graph.add((term, p, o)) #print type(p), p #print type(o), o #print '*' except KeyError: continue # check if the graph is not empty if not graph: logger.warning('Retrieved empty graph for ' + unicode(term)) return graph
#get tag_freq_dict for training data from user_tag_dict import sys import cjson infilename = str(sys.argv[1]) outfilename = str(sys.argv[2]) #unique=str(sys.argv[3]) #uniquetag=open(unique,'w') outfile = open(outfilename, 'w') infile = open(infilename, 'r') print infilename, outfilename #unique_tag={} tag_freq_dict = {} for line in infile: # print line #line = line.split('\t') tagdict = cjson.decode(line)[1] for key, value in tagdict.iteritems(): tag_freq_dict[key] = tag_freq_dict.get(key, 0) + 1 #if len(tagdict[1])<5 or tagdict[-1]<9: # continue #else: #unique_tag[tagdict[0]]=tagdict[-1] outfile.write(cjson.encode(tag_freq_dict) + '\n') #uniquetag.write(cjson.encode(unique_tag)+'\n')
def main(self): while not self.finished(): if self.dataReady("inbox"): channel = self.recv("inbox") sleeper.sleep( 1) # Temporary delay to ensure not hammering /programmes # Setup in case of URL errors later data = None # Define URLs for getting schedule data and DVB bridge information # By BBC covention, schedule info runs to 5am the next day if datetime.utcnow().hour < 5: scheduleurl = "http://www.bbc.co.uk" + self.channels[ channel][1] + "/" + strftime( "%Y/%m/%d", gmtime(time() - 86400)) + ".json" else: scheduleurl = "http://www.bbc.co.uk" + self.channels[ channel][1] + "/" + strftime("%Y/%m/%d", gmtime( time())) + ".json" #syncschedurl = "http://beta.kamaelia.org:8082/dvb-bridge?command=channel&args=" + urllib.quote(self.channels[channel][0]) #synctimeurl = "http://beta.kamaelia.org:8082/dvb-bridge?command=time" syncschedurl = "http://10.92.164.147:8082/dvb-bridge?command=channel&args=" + urllib.quote( self.channels[channel][0]) synctimeurl = "http://10.92.164.147:8082/dvb-bridge?command=time" # Grab SyncTV time data to work out the offset between local (NTP) and BBC time (roughly) self.send([synctimeurl], "dataout") while not self.dataReady("datain"): self.pause() yield 1 recvdata = self.recv("datain") if recvdata[0] == "OK": content = recvdata[1] else: content = None # Work out time difference between local time and BBC time if content != None: try: decodedcontent = cjson.decode(content) if decodedcontent[0] == "OK": difference = time() - decodedcontent[2]['time'] except cjson.DecodeError, e: print "cjson.DecodeError:", e.message if 'difference' in locals(): # Grab actual programme start time from DVB bridge channel page self.send([syncschedurl], "dataout") while not self.dataReady("datain"): self.pause() yield 1 recvdata = self.recv("datain") if recvdata[0] == "OK": content = recvdata[1] else: content = None if content != None: try: decodedcontent = cjson.decode(content) if decodedcontent[0] == "OK": proginfo = decodedcontent[2]['info'] except cjson.DecodeError, e: print "cjson.DecodeError:", e.message # Grab BBC schedule data for given channel self.send([scheduleurl], "dataout") while not self.dataReady("datain"): self.pause() yield 1 recvdata = self.recv("datain") if recvdata[0] == "OK": content = recvdata[1] else: content = None # Read and decode schedule if content != None: try: decodedcontent = cjson.decode(content) except cjson.DecodeError, e: print "cjson.DecodeError:", e.message if 'proginfo' in locals(): showdate = proginfo['NOW']['startdate'] showtime = proginfo['NOW']['starttime'] actualstart = proginfo['changed'] showdatetime = datetime.strptime( str(showdate[0]) + "-" + str(showdate[1]) + "-" + str(showdate[2]) + " " + str(showtime[0]) + ":" + str(showtime[1]) + ":" + str(showtime[2]), "%Y-%m-%d %H:%M:%S") # SyncTV (DVB Bridge) produced data - let's trust that if 'decodedcontent' in locals(): for programme in decodedcontent['schedule']['day'][ 'broadcasts']: starttime = parse(programme['start']) gmt = pytz.timezone("GMT") starttime = starttime.astimezone(gmt) starttime = starttime.replace(tzinfo=None) # Attempt to identify which DVB bridge programme corresponds to the /programmes schedule to get PID if showdatetime == starttime or ( showdatetime + timedelta(minutes=1) == starttime and string.lower( proginfo['NOW']['name']) == string.lower( programme['programme'] ['display_titles']['title']) ) or (showdatetime - timedelta(minutes=1) == starttime and string.lower(proginfo['NOW']['name']) == string.lower( programme['programme'] ['display_titles']['title'])): duration = ( proginfo['NOW']['duration'][0] * 60 * 60 ) + (proginfo['NOW']['duration'][1] * 60) + proginfo['NOW']['duration'][2] progdate = parse(programme['start']) tz = progdate.tzinfo utcoffset = datetime.strptime( str(tz.utcoffset(progdate)), "%H:%M:%S") utcoffset = utcoffset.hour * 60 * 60 # Something's not right with the code below #TODO #FIXME timestamp = sleeper.mktime( showdatetime.timetuple()) + utcoffset if 'difference' in locals(): offset = (timestamp - actualstart) - difference else: offset = timestamp - actualstart pid = programme['programme']['pid'] title = programme['programme'][ 'display_titles']['title'] # Fix for unicode errors caused by some /programmes titles if (not isinstance(title, str)) and ( not isinstance(title, unicode)): title = str(title) print[ pid, title, offset, duration, str(showdatetime) + " GMT", utcoffset ] data = [ pid, title, offset, duration, timestamp, utcoffset ] else: # Couldn't use the DVB Bridge, so work out what's on NOW here utcdatetime = datetime.now() # Analyse schedule if 'decodedcontent' in locals(): for programme in decodedcontent['schedule']['day'][ 'broadcasts']: starttime = parse(programme['start']) starttime = starttime.replace(tzinfo=None) endtime = parse(programme['end']) endtime = endtime.replace(tzinfo=None) if (utcdatetime >= starttime) & (utcdatetime < endtime): pid = programme['programme']['pid'] title = programme['programme'][ 'display_titles']['title'] # Fix for unicode errors caused by some /programmes titles if (not isinstance(title, str)) and ( not isinstance(title, unicode)): title = str(title) # Has to assume no offset between scheduled and actual programme start time as it knows no better because of the lack of DVB bridge progdate = parse(programme['start']) tz = progdate.tzinfo utcoffset = datetime.strptime( str(tz.utcoffset(progdate)), "%H:%M:%S") utcoffset = utcoffset.hour * 60 * 60 timestamp = sleeper.mktime( progdate.timetuple()) - utcoffset print[ pid, title, 0, programme['duration'], programme['start'], utcoffset ] data = [ pid, title, 0, programme['duration'], timestamp, utcoffset ]
def map_hashtag_to_locFreq(self, key, line): if False: yield listt = cjson.decode(line) for [uid, freq] in listt[1].iteritems(): self.hashtagdict[uid][listt[0]] = freq
import find_mk_params, sys, os, re, cjson mkout_dir = find_mk_params.run_name + '/mktest_out/' gm = cjson.decode(open(find_mk_params.run_name + '/genemaps.json').read()) sn = cjson.decode(open(find_mk_params.run_name + '/species_names.json').read()) geneFamilies = gm['geneFamilies'] geneToSpecies = gm['geneToSpecies'] outf = open(find_mk_params.run_name + '/alpha_values.txt', 'w') outf.write("K12Gene\tGene\tAlpha\tDn\tDs\tPn\tPs\n") for gf in geneFamilies: gf.sort() outfname = mkout_dir + str(repr(gf).__hash__()) + '.txt' try: outfd = open(outfname) mktest_results = outfd.read() except Exception as e: sys.stderr.write("Couldn't read output for %s (%s)\n" % (outfname, e)) continue fixedAS = re.findall('#Fixed\s+(\d+)\s+(\d+)', mktest_results) polyAS = re.findall('#Poly\s+(\d+)\s+(\d+)', mktest_results) if len(fixedAS) != 1: sys.stderr.write("File %s has %i #Fixed lines\n" % (outfname, len(fixedAS))) continue if len(polyAS) != 1: sys.stderr.write("File %s has %i #Poly lines\n" % (outfname, len(polyAS))) continue Dn, Ds = fixedAS[0] Pn, Ps = polyAS[0] Dn, Ds, Pn, Ps = map(int, (Dn, Ds, Pn, Ps))
for key, value in dictionary.iteritems(): if isinstance(value, dict): outfile.write('%s%s\n' % (ident, key)) print_dict(value, outfile, ident + ' ') else: outfile.write(ident + '%s = %s\n' % (key, value)) #G=nx.DiGraph() #G.add_node('ROOT') #args rootdict unitags gpd folk removelist infile5 = sys.argv[5] infile5 = open(infile5, 'r') removelist = cjson.decode(infile5.readline()) taglist = [] infile1 = sys.argv[2] #'/spare/wei/local/uni_tag_ch3' #infile1='./link-dict_lt10' infile1 = open(infile1, 'r') taglist = cjson.decode(infile1.readline()) taglist = taglist.keys() #infile='/spare/wei/root_dict_en_0.1_l1-3' infile = sys.argv[1] #'/spare/wei/local/root_dict_ch3-log' #infile='/spare/wei/root_dict_en_log_0.1_v2' #infile='root_dict_lt10_schz_0.6' infile = open(infile, 'r') graph_parent_dict = {} graph_parent_dict_s = {}
try: homedir = os.path.expanduser("~") file = open(homedir + "/twitter-login.conf", 'r') save = True except IOError, e: print( "Failed to load config file - not saving oauth keys: " + str(e)) if save: raw_config = file.read() file.close() # Read config and add new values config = cjson.decode(raw_config) config['key'] = access_token['oauth_token'] config['secret'] = access_token['oauth_token_secret'] raw_config = cjson.encode(config) # Write out the new config file try: file = open(homedir + "/twitter-login.conf", 'w') file.write(raw_config) file.close() except IOError, e: print("Failed to save oauth keys: " + str(e)) self.keypair = [
import cjson from collections import defaultdict from operator import itemgetter gpdp = '../backbone1/gpd_%s' bb = defaultdict(dict) bbb = defaultdict(dict) ll = [ 'atlanta', 'new_york', 'la', 'seatle', 'houston', 'dallas', 'indiana', 'miami', 'sf', 'chicago' ] out1 = open('cnts', 'w') out2 = open('cnts1', 'w') for key in ll: gpd = gpdp % key inf = open(gpd, 'r') gpddict = cjson.decode(inf.readline()) for key, value in gpddict.iteritems(): #bb[key][value]=bb[key].get(value,0)+1 bb[key][value[0]] = bb[key].get(value[0], 0) + 1 bbb[key][value[0]] = bbb[key].get(value[0], 0) + value[1] bb1 = defaultdict(list) cnt = 0 for key, value in bb.iteritems(): xxx = sorted(value.items(), key=itemgetter(1), reverse=1) prb = sorted(bbb[key].items(), key=itemgetter(1), reverse=1) # print key,xxx print >> out1, key, xxx, prb if len(xxx) >= 2 and xxx[0][1] == xxx[1][1] and xxx[0][1] >= 2: #print>>out2,key,xxx,prb eqlist = [] for item in xxx:
def load(self): data = self.file.read() return json.decode(data)
def decode(self, response_object): #return self._decode_dates(json.load(response_object)) return self._decode_dates( cjson.decode(unicode(response_object.read(), 'utf-8'))) #@UndefinedVariable
# { # "connected":"69", # "when":["Morning","Noon"], # "match":"one" # } # endmsg # end # parameters are separated by comas. First one # is connected value to trigger, other ones are # one or more periods where state must become "pushed" import Crossfire import string from CFTimeOfDay import TimeOfDay import cjson event = Crossfire.WhatIsEvent() parameters = cjson.decode(event.Message) alreadymatched = (event.Value != 0) connected = int(parameters["connected"]) inverse = "inverse" in parameters and parameters["inverse"] == True match = False if not "match" in parameters: Crossfire.Log( Crossfire.LogError, "Script push_period.py didn't get a 'match' parameter. Only got %s" % parameters) elif parameters["match"].lower() == "one": match = TimeOfDay().matchAny(parameters["when"]) != inverse elif parameters["match"].lower() == "all": match = TimeOfDay().matchAll(parameters["when"]) != inverse else: Crossfire.Log(
############################## Load genemaps.json import itertools import cjson gm=cjson.decode(open('genemaps.json').read()) geneFamilies=gm['geneFamilies'] geneToSpecies=gm['geneToSpecies'] geneToOrthologs=gm['geneToOrthologs'] ############################## Upload genemaps.json to sqlite import sqlite3 conn=sqlite3.connect('example.db') c=conn.cursor() def entryFromGene(g): species = geneToSpecies[g] orthlgs = ' '.join(geneToOrthologs[g]) ontl = None return (g, species, orthlgs, ontl) c.executemany('INSERT INTO Genes VALUES (?,?,?,?)', itertools.imap(entryFromGene,geneToSpecies.iterkeys())) conn.commit() ############################## Add nucleotide_seq_fasta and uniprot_xml TEXT columns import sqlite3 conn=sqlite3.connect('example.db') c=conn.cursor() c.execute('ALTER TABLE Genes ADD COLUMN nucleotide_seq_fasta TEXT') c.execute('ALTER TABLE Genes ADD COLUMN uniprot_xml TEXT') conn.commit() ############################## Add fasta sequences import itertools, cjson, sqlite3 gs=cjson.decode(open('gene_sequences.json').read())
def deserialize(msg): return cjson.decode(msg)
def genotype_str(genotype): return fold(operator.add, [allele * count for allele, count in genotype]) if __name__ == '__main__': ploidy = 2 # assume ploidy 2 for all individuals and all positions potential_alleles = ['A', 'T', 'G', 'C'] # genotypes are expressed as sets of allele frequencies genotypes = list_genotypes_to_count_genotypes( list(multiset.multichoose(ploidy, potential_alleles))) for line in sys.stdin: position = cjson.decode(line) #print position['position'] samples = position['samples'] position['coverage'] = sum([ len(sample['alleles']) for samplename, sample in samples.iteritems() ]) #potential_alleles = ['A','T','G','C'] potential_alleles = set() for samplename, sample in samples.items(): # only process snps and reference alleles alleles = [ allele for allele in sample['alleles'] if allele['type'] in ['reference', 'snp']
0, 'num': 250, 'noIL': 1, 'restype': 'company', 'sortas': 'MarketCap', 'q': '[(exchange == "TSE") & (market_cap >= 1000000000) & (dividend_yield >= 3) & (dividend_recent_quarter > 0) & (last_price > 0)]' } response = requests.get(url, params=payload) # Parse response using cjson json_response = cjson.decode(response.text) pdb.set_trace() # Create list of stocks - [cid, symbol, name] symbols = [] for company in json_response['searchresults']: # Dynamically load named values into a dictionary values = {} for column in company['columns']: values[column['field']] = column['value'] # Cast yield, dividend and price dividend_yield = float(values['DividendYield']) / 100 price = float(values['QuoteLast']) dividend = float(values['DividendRecentQuarter'])
def doStuff(self, channel): # Check what's on for each channel self.send(channel, "whatson") while not self.dataReady("whatson"): pass data = self.recv("whatson") if data == None: pid = None else: pid = data[0] title = data[1] offset = data[2] duration = data[3] expectedstart = data[4] if pid != self.channels[channel]: # Perhaps just do a duplicate scan before creating Twitter stream if pid == None: self.channels[channel] = None print(channel + ": Off Air") else: self.channels[channel] = pid self.send(["http://www.bbc.co.uk/programmes/" + pid + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": programmedata = recvdata[1] else: # Fake programme data to prevent crash - not ideal programmedata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' # RDF reader needs to read from a file so write out first # Alternative is to read from a URL, but this lacks proper proxy support filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(programmedata) file.close() g = Graph() # This is a temporary proxy fix. A URL could be put here instead g.parse("tempRDF.txt") # Identify the brand and whether there are any official hashtags twittags = list() for bid in g.subjects(object=rdflib.URIRef( 'http://purl.org/ontology/po/Brand')): # bid is Brand ID bidmod = bid.replace("#programme", "") bidmod = str(bidmod.replace("file:///programmes/", "")) if self.officialbrandtags.has_key(bidmod): twittags = self.officialbrandtags[bidmod] break # Identify the series and whether there are any official hashtags if len(twittags) == 0: # Identify the brand and whether there are any official hashtags for sid in g.subjects(object=rdflib.URIRef( 'http://purl.org/ontology/po/Series')): # sid is Series ID sidmod = sid.replace("#programme", "") sidmod = str(sidmod.replace("file:///programmes/", "")) if self.officialseriestags.has_key(sidmod): twittags = self.officialseriestags[sidmod] break vidmod = "" so = g.subject_objects(predicate=rdflib.URIRef( 'http://purl.org/ontology/po/version')) # Pick a version, any version - for this which one doesn't matter for x in so: # vid is version id vid = x[1] vidmod = vid.replace("#programme", "") vidmod = vidmod.replace("file:///programmes/", "") break # Got version, now get people self.send( ["http://www.bbc.co.uk/programmes/" + vidmod + ".rdf"], "dataout") while not self.dataReady("datain"): pass recvdata = self.recv("datain") if recvdata[0] == "OK": versiondata = recvdata[1] else: versiondata = '<?xml version="1.0" encoding="utf-8"?> \ <rdf:RDF xmlns:rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" \ xmlns:rdfs = "http://www.w3.org/2000/01/rdf-schema#" \ xmlns:owl = "http://www.w3.org/2002/07/owl#" \ xmlns:foaf = "http://xmlns.com/foaf/0.1/" \ xmlns:po = "http://purl.org/ontology/po/" \ xmlns:mo = "http://purl.org/ontology/mo/" \ xmlns:skos = "http://www.w3.org/2008/05/skos#" \ xmlns:time = "http://www.w3.org/2006/time#" \ xmlns:dc = "http://purl.org/dc/elements/1.1/" \ xmlns:dcterms = "http://purl.org/dc/terms/" \ xmlns:wgs84_pos= "http://www.w3.org/2003/01/geo/wgs84_pos#" \ xmlns:timeline = "http://purl.org/NET/c4dm/timeline.owl#" \ xmlns:event = "http://purl.org/NET/c4dm/event.owl#"> \ </rdf:RDF>' filepath = "tempRDF.txt" file = open(filepath, 'w') file.write(versiondata) file.close() g = Graph() g.parse("tempRDF.txt") # Identify if this is a change of programme, or the first time we've checked what's on for print clarity if self.firstrun: print(channel + ": " + title) else: print(channel + ": Changed to - " + title) # Minor alterations title = title.replace("&", "and") if ":" in title: titlebits = title.split(":") title = titlebits[0] # Saving a copy here so apostrophes etc can be used in the Twitter people search titlesave = title # Remove punctuation for item in """!"#$%()*+,-./;<=>?@[\\]?_'`{|}?""": title = title.replace(item, "") keywords = dict() # Save keywords next to a descriptor of what they are keywords[pid] = "PID" # Add official hashtags to the list for tag in twittags: keywords[tag] = "Twitter" # Duplicates will be removed later # If the title has 'The' in it, add hashtags both with and without the 'the' to the keyword list # This simply broadens the list of search terms if string.find(title, "The", 0, 3) != -1: newtitle = string.replace(re.sub("\s+", "", title), "The ", "", 1) keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+", "", title))] = "Title" # Check for and remove year too - some programmes contain a year which may be undesirable from a search point of view keywords["#" + string.replace( string.lower(re.sub("\s+", "", title)), " " + str(date.today().year), "", 1)] = "Title" keywords[ '#' + string.lower(re.sub("\s+", "", newtitle))] = "Title" # Check for and remove year too keywords['#' + string.replace( string.lower(re.sub("\s+", "", newtitle)), " " + str(date.today().year), "", 1)] = "Title" else: keywords[channel] = "Channel" keywords["#" + string.lower(re.sub("\s+", "", title))] = "Title" keywords["#" + string.replace( string.lower(re.sub("\s+", "", title)), " " + str(date.today().year), "", 1)] = "Title" allwordtitle = string.replace(title, "The ", "", 1) allwordtitle = allwordtitle.lower() # Remove current year from events allwordtitle = allwordtitle.replace( " " + str(date.today().year), "", 1) titlewords = allwordtitle.split() if len(titlewords) > 1: keywords[allwordtitle] = "Title" else: # Trial fix for issue of one word titles producing huge amounts of data # This occurs for keywords like 'Weather' and 'Breakfast' which aren't BBC limited terms keywords[allwordtitle + "^" + "bbc"] = "Title" keywords["#" + re.sub("\s+", "", allwordtitle)] = "Title" # Where a channel uses text for a number, we also want to search using the numeric representation numwords = dict({ "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7 }) for word in numwords: if word in channel.lower( ) and channel != "asiannetwork": # Bug fix! asianne2rk numchannel = string.replace(channel.lower(), word, str(numwords[word])) keywords[numchannel] = "Channel" break if str(numwords[word]) in channel.lower(): numchannel = string.replace(channel.lower(), str(numwords[word]), word) keywords[numchannel] = "Channel" break # Load NameCache (people we've already searched for on Twitter to avoid hammering PeopleSearch) save = False try: homedir = os.path.expanduser("~") file = open(homedir + "/namecache.conf", 'r') save = True except IOError, e: print( "Failed to load name cache - will attempt to create a new file: " + str(e)) if save: raw_config = file.read() file.close() try: config = cjson.decode(raw_config) except cjson.DecodeError, e: config = dict() else: config = dict() # Find people's names in retrieved RDF s = g.subjects( predicate=rdflib.URIRef( 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), object=rdflib.URIRef('http://purl.org/ontology/po/Role')) for x in s: rid = g.value(predicate=rdflib.URIRef( 'http://purl.org/ontology/po/role'), object=rdflib.BNode(x)) pid = g.value( subject=rdflib.BNode(rid), predicate=rdflib.URIRef( 'http://purl.org/ontology/po/participant')) firstname = str( g.value(subject=rdflib.BNode(pid), predicate=rdflib.URIRef( 'http://xmlns.com/foaf/0.1/givenName'))) lastname = str( g.value(subject=rdflib.BNode(pid), predicate=rdflib.URIRef( 'http://xmlns.com/foaf/0.1/familyName'))) if config.has_key(firstname + " " + lastname): # Found a cached value - this person has been searched for using Twitter if config[firstname + " " + lastname] != "": keywords[config[firstname + " " + lastname]] = "Twitter" else: # Not cached yet - new request to Twitter people search self.send(firstname + " " + lastname, "search") while not self.dataReady("search"): pass twitdata = self.recv("search") screenname = "" try: for user in twitdata: # Only use this Twitter screen name if there's a good chance they're the person we're after if user.has_key('verified'): if (user['verified'] == True or user['followers_count'] > 10000 ) and string.lower( user['name']) == string.lower( firstname + " " + lastname): screenname = user['screen_name'] keywords[screenname] = "Twitter" break except AttributeError, e: pass config[firstname + " " + lastname] = screenname keywords[firstname + " " + lastname] = "Participant"
def read_json_yield_uid(line): line = cjson.decode(line) user_id = line['user_id'] tags = line['tag'] for tag in tags: yield user_id, tag
def print_dict(dictionary,outfile,ident = '', braces=1): """ Recursively prints nested dictionaries.""" for key, value in dictionary.iteritems(): if isinstance(value, dict): outfile.write( '%s%s\n' %(ident,key) ) print_dict(value, outfile, ident+' ') #else: # outfile.write(ident+'%s = %s\n' %(key, value)) infile='../graph/gpd_hou'#'backbone-conf-5' #infile='../backbone1/gpd_la' list1=[] infile=open(infile,'r') infile=cjson.decode(infile.readline()) print len(infile) #for key,value in infile.iteritems(): # if key not in list1: # list1.append(key) # if value not in list1: # list1.append(value) #print len(list1) dict1={} for key,value in infile.iteritems(): dict1[key]=value[0] #if 'sport news' in dict1.keys(): # print '------' outfile='folk_hou_cost'#'folk-backbone5' outfile=open(outfile,'w') a=build_tree(dict1)
def jloads(json_string): """ Deserializes ``json_string`` (a string containing a JSON document) to a Python object, using cjson. """ return cjson.decode(json_string)
def jloads(json_string): global cjson if cjson: return cjson.decode(json_string) else: return json.loads(json_string)
import cjson import sys from operator import itemgetter from collections import defaultdict from copy import deepcopy infile = sys.argv[1] #'/spare/wei/local/rulelist_en_ch3-log' outfile = sys.argv[2] #'/spare/wei/local/root_dict_ch3-log' #infile='rulelist_lt10_schz_0.6' #outfile='root_dict_lt10_schz_0.6' outfile = open(outfile, 'w') root_dict = defaultdict(list) for line in open(infile, 'r'): line = cjson.decode(line) #print line[0] root_dict[line[0]].append([line[1], line[2], line[3]]) #root_dict[line[1]].append([line[0],line[2],line[3],line[2]*line[3]]) sorted_root_dict = defaultdict(list) for key, value in root_dict.iteritems(): v = deepcopy(value) v = sorted(v, key=itemgetter(1), reverse=1) sorted_root_dict[key] = v for key, value in sorted_root_dict.iteritems(): outfile.write(cjson.encode([key, value]) + '\n')