def loading_unloading_points(request): data = json.loads(request.body) transaction = Transaction.objects.get( transaction_id=data['transaction_id']) locations = transaction.loading_unloading_location.all() loading_list = [] unloading_list = [] for value in locations: temp1 = [] temp2 = [] if value.type == 'loading': temp1.append(value.address) temp1.append(value.city.name) loading_list.append(temp1) elif value.type == 'unloading': temp2.append(value.address) temp2.append(value.city.name) unloading_list.append(temp2) df_loading = pd.DataFrame(loading_list, columns=['address', 'city']) loading_details = df_loading.reset_index().to_json(orient='records') loading_json = json.loads(loading_details) df_unloading = pd.DataFrame(unloading_list, columns=['address', 'city']) unloading_details = df_unloading.reset_index().to_json(orient='records') unloading_json = json.loads(unloading_details) return { "loading": loading_json, "unloading": unloading_json, "material": transaction.material }
def test_decodeArrayFaultyUnicode(self): try: ujson.loads('[18446098363113800555]') except ValueError: pass else: assert False, "expected ValueError"
def testArrayNumpyLabelled(self): input = {'a': []} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.empty((1, 0)) == output[0]).all()) self.assertTrue((np.array(['a']) == output[1]).all()) self.assertTrue(output[2] is None) input = [{'a': 42}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.array([42]) == output[0]).all()) self.assertTrue(output[1] is None) self.assertTrue((np.array(['a']) == output[2]).all()) # py3 is non-determinstic on the ordering...... if not py3compat.PY3: input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue(output[1] is None) self.assertTrue((np.array(['a', 'b']) == output[2]).all()) input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue((np.array(['1','2','3']) == output[1]).all()) self.assertTrue((np.array(['a', 'b']) == output[2]).all())
def testArrayNumpyLabelled(self): input = {'a': []} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.empty((1, 0)) == output[0]).all()) self.assertTrue((np.array(['a']) == output[1]).all()) self.assertTrue(output[2] is None) input = [{'a': 42}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.array([42]) == output[0]).all()) self.assertTrue(output[1] is None) self.assertTrue((np.array([u('a')]) == output[2]).all()) # py3 is non-determinstic on the ordering...... if not compat.PY3: input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue(output[1] is None) self.assertTrue((np.array([u('a'), 'b']) == output[2]).all()) input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}} output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue((np.array(['1','2','3']) == output[1]).all()) self.assertTrue((np.array(['a', 'b']) == output[2]).all())
def read_input_data_from_parse_file(file_name): with codecs.open(file_name, mode='r', encoding='utf-8') as parse_file: json_str = parse_file.read().strip() print json_str en_parse_dict = json.loads(json_str) return en_parse_dict
def process_result_value(self, value, dialect): # From DB to Python try: data = json.loads(value) except Exception as e: data = {} # taking care of empty data to default variants return dict(dict(self.variants.items()), **data)
def parseLisLinks(url): newsdetails = [] res = requests.get(url) jd = json.loads(res.text.lstrip('newsloaddrcallback(').strip(');')) for ent in jd['result']['data']: newsdetails.append(getNewsDetail(ent)) return newsdetails
def get_concepts(article): abstract_entity=[] name_entity=[] name_entity_string=[] abstract_entity_string=[] for i in range(0,len(article)): text= ' '.join(article[i]) url = 'http://model.dbpedia-spotlight.org/en/annotate' #url = "http://*****:*****@URI']) soup = BeautifulSoup(page,"html5lib") if j['Resources'][i]['@surfaceForm'] not in name_entity_string: #not consider duplicates name_entity_string.append(j['Resources'][i]['@surfaceForm']) abstract_entity_string.append(soup.find('p',{"class": "lead"}).string) except Exception as e: print("Error: ", e.args)#i've a problem with some word (decoding problem) I'll see later exit() name_entity.append(name_entity_string.copy()) abstract_entity.append(abstract_entity_string.copy()) abstract_entity_string.clear() name_entity_string.clear() else: name_entity.append([]) abstract_entity.append([]) return name_entity,abstract_entity
def show_windows(hWndList): for h in hWndList: # title = show_window_attr(h) windowTitle = mUtil.getWindowText(h) # log.log '窗口标题:%s' % (str(title)) if "资金余额" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_capital_balance(findTitle) if "总 资 产" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_total_assets(findTitle) if "股票市值" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_stock_market_value(findTitle) if "可取金额" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_advisable_fundse(findTitle) if "冻结金额" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_frozen_fundse(findTitle) if "可用金额" in str(windowTitle): findTitle = find_text_for_index(hWndList, h) SingleUserInfo.set_available_funds(findTitle) SingleUserInfo.__dict__ = json.loads( json.dumps(SingleUserInfo.__dict__).replace("\u0000", "")) log.log(json.dumps(SingleUserInfo.__dict__))
def getMixtapeList(self, response): mixtapelist = json.loads(response.body) isEnd = mixtapelist.get('paging').get('is_end') # 测试阶段只抓第一页 next_page = mixtapelist.get('paging').get('next') self.offset = next_page.split('=')[-1] datalist = mixtapelist['data'] for data in datalist: mixtapeIds.append(data.get('id')) print "id = %s " % data.get('id') if not isEnd: self.offset = mixtapeIds.__len__() yield scrapy.FormRequest( url=self.start_urls[0], formdata={ 'limit': self.limit, 'offset': str(self.offset) }, method='GET', callback=self.getMixtapeList) else: print '获取 私家课 列表 id 集合 结束,共有 %d 条' % mixtapeIds.__len__() for id in mixtapeIds: yield scrapy.FormRequest( url=playlist % id, method='GET', callback=self.parseMixtapeJson)
def get_rois_json_safe(self): rois = [] total = len(self.roi) for index, rv in enumerate(self.roi.values()): print 'Check {}/{} ROI...'.format(index + 1, total) try: json.loads(json.dumps(rv)) # verify except Exception as e: print 'ROI #{} has problem, "{}", SKIP LOADING!'.format( rv.id, e) rv.error = str(e) rv.blank = None rv.flicker = None rv.responses = {} rois.append(rv) else: rois.append(rv) return rois
def parse_start_url(self, response): print('----------爬取今日头条信息-----------') jsonResult = json.loads(response.text) dataArr = jsonResult['data'] for data in dataArr: title = data['title'] url = data['source_url'] if "http" not in url: url = "https://www.toutiao.com" + url yield Util._item(title, url, source)
def get_image_url(response1, number): for i in range(1, 10): print(i) new_response = json.loads(response1) + '/' + str(i) print(new_response) result = get_one_page(new_response) pattern = re.compile('<img src="(.*?)" alt', re.S) items = re.search(pattern, result) print(u'获取到地址为:', items.group(1), u'的图片') return items.group(1)
def parseMixtapeJson(self, response): data = json.loads(response.body) file_dir = data['title'] tracks = data['tracks'] print '总章节 %s ' % len(tracks) for index, track in enumerate(tracks): file_name = '%s%s' % (index, track['title']) audio_url = track['audio']['url'] print 'file name %s , audio url %s: ' % (file_name, audio_url) self.downloadAudio(file_dir, file_name, audio_url) pass
def get_allocated_vehicle(request): data = json.loads(request.body) transaction = Transaction.objects.get( transaction_id=data['transaction_id']) allocated_vehicle_list = [] for value in transaction.allocated_vehicle.all(): temp = [] temp.append(value.vehicle_number.vehicle_type.vehicle_type + ", " + value.vehicle_number.vehicle_type.capacity) temp.append(value.vehicle_number.vehicle_number) temp.append(value.vehicle_number.driver.driving_licence_number) temp.append(value.vehicle_number.driver.name) temp.append(value.vehicle_number.driver.phone) allocated_vehicle_list.append(temp) df_allocated = pd.DataFrame(allocated_vehicle_list, columns=[ 'vehicle_type', 'vehicle_number', 'driving_licence', 'driver_name', 'driver_phone' ]) data_allocated = df_allocated.reset_index().to_json(orient='records') data_allocated = json.loads(data_allocated) return data_allocated
def product_action(self, action): requestDto = TransactionDto().GetRequest( token=self.productScreen.parent.token, action=action, product_id=int(self.productScreen.id), buy_price=self.productScreen.buy_price.text.split(":")[1], sell_price=self.productScreen.sell_price.text.split(":")[1], product_quantity=self.productScreen.input_quantity.text) try: response = json.loads(self.connectionManager.send_request(body=requestDto.toJSON(), method='GET')) self.validate_response(response=response) self.show_product() except (httplib.HTTPException, socket.error) as ex: print 'products request error {0}'.format(ex)
def websocket_loop(ws, handler_map): import pandas.json as ujson while True: msg = ws.receive() if msg is None: continue _log.debug('msg received %s', msg) data = ujson.loads(msg) t = data['type'] payload = data['data'] if t not in handler_map: _log.warning('no handler defined for message of type: %s', t) else: handler_map[t](payload, WebsocketSend(ws))
def read_status_for(id): headers = {'Authorization': 'Basic QXJhc2g6MTIzNDU='} request = urllib2.Request( "http://jebo.mynetgear.com:8080/json.htm?type=devices&rid=" + str(id), headers=headers) contents = urllib2.urlopen(request).read() r = json.loads(contents) r = r["result"] r = r[0] r = r['Status'] if (r == 'On'): return (1) elif (r == 'Off'): return (0)
def get_diff_table(id1, id2, direction, ops, jsonit=True): """ Get cached data for the diff table and if not available calculate and cache it :param id1: :param id2: :param direction: :param ops: :param jsonit: If True return result as json string. Otherwise return Python object. :return: """ # HACK: force calculation of everything. then we only do it once and use the cache in the future all_ops = "structure,content,merge,reorder" hash_name = create_hashname(id1, id2, 0, 0, direction, all_ops) t1 = timeit.default_timer() json_result = get_diff_cache(hash_name) t2 = timeit.default_timer() _log.debug("TIMER: get diff: cache (json)", t2 - t1) diffobj = None if json_result is None: # get one for the detail t3 = timeit.default_timer() diffobj = calc_diff(id1, id2, direction, all_ops) t4 = timeit.default_timer() _log.debug("TIMER: get diff: calc diff ", t4 - t3) if isinstance(diffobj, Diff): # log the detail json_result = ujson.dumps(diffobj.serialize()) set_diff_cache(hash_name, json_result) else: # todo later find a way to send the error # e.g. there's no matching column in this case json_result = ujson.dumps(diffobj) # which is {} for now! set_diff_cache(hash_name, json_result) elif jsonit is False: diffobj = Diff().unserialize(ujson.loads(json_result)) if jsonit: return json_result else: return diffobj
def twitter_main(num): """ use twitter api to gather data and conduct sentiment analysis :param num:number of tweets :return: [{name:{pos_perc:val},{neg_perc:val},{topic1:{pos_perc:val},{neg_perc:val}}}] """ sia = SentimentIntensityAnalyzer() names = get_namelist()[:5] stopws = set(stopwords.words('english')) punct = set(string.punctuation) exclude = stopws.union(punct) for row in names: name = row['name'] auth = OAuth('862644832442789894-2vWBy8SIEUKLKQWRnTvSWO3gv0rsb9F', '9PWQN0SMGxkgxmU8O0gP8c3EIelyHO7KyCPcodhRdc6DL' , 'SKjZV8CKLNnXKCINixEd0ubTc', 'NKt17D4qjAVDd6E8oeD3TiT7FLjhZFaSqRS8ICcBfSGJUPpQLw') twitter_stream = TwitterStream(auth=auth) # iter = twitter_stream.statuses.sample() tweets_iter = twitter_stream.statuses.sample() count = 0 word_dist = [] sa_num = {'pos': 0, 'neu': 0, 'neg': 0} for tweet in tweets_iter: print (count) if count > num: break tweet = json.dumps(tweet) tweet = json.loads(tweet) if 'text' in tweet: words = clean_twitter(tweet['text'], str(name), exclude) sa_result = sia.polarity_scores(tweet['text']) if max(sa_result['pos'], sa_result['neu'], sa_result['neg']) == sa_result['pos']: sa_num['pos'] += 1 elif max(sa_result['pos'], sa_result['neu'], sa_result['neg']) == sa_result['neg']: sa_num['neg'] += 1 else: sa_num['neu'] += 1 word_dist = word_dist + words count += 1 sa_perc = {'pos': sa_num['pos'] / num, 'neu': sa_num['neu'] / num, 'neg': sa_num['neg'] / num} word_dist = FreqDist(word_dist) word_top5 = word_dist.most_common(5) mongo_dict = {'Name': name, 'Top5': word_top5, 'Sentiment': sa_perc} print (mongo_dict) save_in_mongo(mongo_dict, is_update=True)
def main(): url = "http://www.mzitu.com/" response = get_one_page(url) items = parse_total_page(response) for item in items: # 获取所有网页图片名称 path = get_image_name(item) # 获取每个网页包含的图片数量 number = get_image_number(item) # 获取每个网页的图片地址 make_dir(path) for i in range(1, int(number) + 1): new_response = json.loads(item) + '/' + str(i) result = get_one_page(new_response) pattern = re.compile('<img src="(.*?)" alt', re.S) items = re.search(pattern, result) print(u'获取到地址为:', items.group(1), u'的图片') name = 'picture' + str(i) + '.jpg' save_image(items.group(1), path, name)
def JSONValidator(jstring): """ Checks given jstring compatibilty to JSON format Parameters ---------- jstring: string possible JSON string to be checked Returns ------- json_data: JSON data as dict data extracted from JSON None if it is not detectable """ # trying to read possible json string, if it impossible then return False try: json_data = loads(jstring) except Exception: json_data = None return json_data
def extract_responses_from_tfm_db(self): """ Wrapper adhoc - TFM suggestions - database -> tfm_mp_data Extract suggestions from database and export to excel """ query = "SELECT data FROM encuestas" results = MysqlND.execute_query(query, ()) self.survey_data = [] for result in results: data = result[0] data = json.loads(data) response_values = {} # print(data) count_uxs = 0 for key, value in data.iteritems(): if 'ux_' in key: count_uxs += 1 number_survey = int(key.replace("ux_", "")) response_values[number_survey] = int(value) # response_values = sorted(response_values.items()) print("Total = " + str(count_uxs)) response_values_keys = response_values.keys() response_values_keys.sort() final_response_values = [] count = 1 # for key in response_values_keys: for key in range(1, 27): # print "%s: %s" % (key, response_values[key]) if key in response_values_keys: # if count == key: final_response_values.append(response_values[key]) else: final_response_values.append(3) count += 1 count += 1 self.survey_data.append(final_response_values)
def update_product(self): requestDto = ProductDto().GetRequest( token=self.productScreen.parent.token, product_id=int(self.productScreen.id)) try: response = json.loads(self.connectionManager.send_request( body=requestDto.toJSON(), method='GET'))['data']['product_list'][0] self.productScreen.product_name.text = response['product_name'] self.productScreen.product_description.text = response['product_description'] self.productScreen.buy_price.text = 'Buy price: {0}'.format( format(float(response['buy_price']), '.3f') ) self.productScreen.sell_price.text = 'Sell price: {0}'.format( format(float(response['sell_price']), '.3f') ) self.productScreen.current_quantity.text = 'Quantity: {0}'.format( int(response['product_quantity']) ) self.show_product() except (httplib.HTTPException, socket.error) as ex: print 'products request error {0}'.format(ex)
def initUserInfo(self): log.log("===================================================") log.log("开始填充用户数据.......") log.log("===================================================") # 程序前置 win32gui.SetForegroundWindow(self.xiadanH) self.clickBroadF2() self.clickBroadF4() childWindows = SingleUtil.findChildWindows(self.xiadanH) for childHw in childWindows: # title = show_window_attr(h) windowTitle = SingleUtil.getWindowText(childHw) # log.log '窗口标题:%s' % (str(title)) if "资金余额" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_capital_balance(findTitle) if "总 资 产" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_total_assets(findTitle) if "股票市值" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_stock_market_value(findTitle) if "可取金额" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_advisable_fundse(findTitle) if "冻结金额" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_frozen_fundse(findTitle) if "可用金额" in str(windowTitle): findTitle = self.find_text_for_index(childWindows, childHw) SingleUserInfo.set_available_funds(findTitle) SingleUserInfo.__dict__ = json.loads( json.dumps(SingleUserInfo.__dict__).replace("\u0000", "")) log.log("===================================================") log.log("用户信息资金信息:") log.log(json.dumps(SingleUserInfo.__dict__)) log.log("===================================================")
def on_message(self, message): rv, err = None, None try: seq, ftype, route, payload = json.loads(message) as_binary = payload.pop('as_binary') func = getattr(self, ftype) with print_captured(self): rv = func(route, **payload) except Exception as e: info = sys.exc_info() source = traceback.format_exception(*info) print '\n======== exception on websocket ========' traceback.print_exception(*info) print '======== exception on websocket ========\n' err = dict(title=e.__class__.__name__, detail=str(e), source=source) if as_binary and rv is not None: # two uint32 for seq and error, 8bytes in total # in network byte order (big endian) meta = struct.pack('!II', seq, 0) # 0 for err (temporary) self.write_message(meta + rv, binary=True) else: self.dump_message(seq, rv, err)
def parse_line(self, line, encoding): try: return json.loads(line.decode(encoding)) except ValueError as exc: raise ParseError('Line delimited JSON parse error - %s' % six.text_type(exc))
def read_relations_from_pdtb_file(file_name): relations = [] with codecs.open(file_name, mode='r', encoding='utf-8') as pdtb_file: relations = [json.loads(x) for x in pdtb_file] return relations
def get_image_number(response1): result = get_one_page(json.loads(response1)) pattern = re.compile('<span>(.*?)</span>', re.S) items = re.findall(pattern, result) print(u'此图片数量为:', items[8]) return items[8]
def get_image_name(response1): result = get_one_page(json.loads(response1)) pattern = re.compile('<h2 class="main-title">(.*?)</h2>', re.S) items = re.search(pattern, result) print(u'图片名称为:', items.group(1)) return items.group(1)
def test_decodeFloatingPointAdditionalTests(self): places = 15 self.assertAlmostEquals(-1.1234567893, ujson.loads("-1.1234567893"), places=places) self.assertAlmostEquals(-1.234567893, ujson.loads("-1.234567893"), places=places) self.assertAlmostEquals(-1.34567893, ujson.loads("-1.34567893"), places=places) self.assertAlmostEquals(-1.4567893, ujson.loads("-1.4567893"), places=places) self.assertAlmostEquals(-1.567893, ujson.loads("-1.567893"), places=places) self.assertAlmostEquals(-1.67893, ujson.loads("-1.67893"), places=places) self.assertAlmostEquals(-1.7893, ujson.loads("-1.7893"), places=places) self.assertAlmostEquals(-1.893, ujson.loads("-1.893"), places=places) self.assertAlmostEquals(-1.3, ujson.loads("-1.3"), places=places) self.assertAlmostEquals(1.1234567893, ujson.loads("1.1234567893"), places=places) self.assertAlmostEquals(1.234567893, ujson.loads("1.234567893"), places=places) self.assertAlmostEquals(1.34567893, ujson.loads("1.34567893"), places=places) self.assertAlmostEquals(1.4567893, ujson.loads("1.4567893"), places=places) self.assertAlmostEquals(1.567893, ujson.loads("1.567893"), places=places) self.assertAlmostEquals(1.67893, ujson.loads("1.67893"), places=places) self.assertAlmostEquals(1.7893, ujson.loads("1.7893"), places=places) self.assertAlmostEquals(1.893, ujson.loads("1.893"), places=places) self.assertAlmostEquals(1.3, ujson.loads("1.3"), places=places)
import numpy as np import pandas as pd from pandas import json from ts_charting.json import to_json import ts_charting.lab.lab as tslab plot_index = pd.date_range(start="2000-1-1", freq="B", periods=10000) df = pd.DataFrame(index=plot_index) df['open'] = np.random.randn(len(plot_index)) df['high'] = np.random.randn(len(plot_index)) df['low'] = np.random.randn(len(plot_index)) df['close'] = np.random.randn(len(plot_index)) lab = tslab.Lab() fig = lab.station('candle') df.tail(5).ohlc_plot() fig.plot_markers('high', df.high > df.high.shift(1), yvalues=df.open) jd = to_json(lab) obj = json.loads(jd)
def test_decodeFloatingPointAdditionalTests(self): self.assertEquals(-1.1234567893, ujson.loads("-1.1234567893")) self.assertEquals(-1.234567893, ujson.loads("-1.234567893")) self.assertEquals(-1.34567893, ujson.loads("-1.34567893")) self.assertEquals(-1.4567893, ujson.loads("-1.4567893")) self.assertEquals(-1.567893, ujson.loads("-1.567893")) self.assertEquals(-1.67893, ujson.loads("-1.67893")) self.assertEquals(-1.7893, ujson.loads("-1.7893")) self.assertEquals(-1.893, ujson.loads("-1.893")) self.assertEquals(-1.3, ujson.loads("-1.3")) self.assertEquals(1.1234567893, ujson.loads("1.1234567893")) self.assertEquals(1.234567893, ujson.loads("1.234567893")) self.assertEquals(1.34567893, ujson.loads("1.34567893")) self.assertEquals(1.4567893, ujson.loads("1.4567893")) self.assertEquals(1.567893, ujson.loads("1.567893")) self.assertEquals(1.67893, ujson.loads("1.67893")) self.assertEquals(1.7893, ujson.loads("1.7893")) self.assertEquals(1.893, ujson.loads("1.893")) self.assertEquals(1.3, ujson.loads("1.3"))
def getCommentCounts(newsurl): m = re.search('doc-i(.+).shtml', newsurl) newsid = m.group(1) comments = requests.get(commentURL.format(newsid)) jd = json.loads(comments.text.strip('var data=')) return jd['result']['count']['total']
def parseState(result): #load all return json.loads(result['json'])
dt # 时间转字符串 - strftime dt.strftime('%Y-%m-%d') # 将每一段落加到list中 article = [] for p in soup.select('#artibody p')[:-1]: article.append(p.text.strip()) ' '.join(article) ' '.join([p.text.strip() for p in soup.select('#artibody p')[:-1]]) # 取得编辑名称 editor = soup.select('.article-editor')[0].text.lstrip('责任编辑:') # 取得评论数 soup.select('#commentCountl') jd = json.loads(comments.text.strip('var data=')) jd['result']['count']['total'] # 如何取得新闻编号 newsurl = 'http://news.sina.com.cn/o/2017-09-26/doc-ifymenmt7129299.shtml' newsid = newsurl.split('/')[-1].rstrip('.shtml').lstrip('doc-i') newsid import re m = re.search(r'doc-i(.*).shtml', newsurl) print(m.group(1)) # 将抓取评论数的方法整理成一函式 commentURL = ''
# read twitter API api_type = "s" # s: Standard, p30: Premium 30 days, pf: Premium Full Archive if len(sys.argv) > 1: option = sys.argv[1] if option in ['s', 'p30', 'pf']: api_type = option else: raise ValueError("First argument should be API type: s or p30 or pf") if api_type == 's': # ##### Standard API ##### data = utils.search_tweets_standard_api(query=config_parser.get('tweets', 'query'), oauth=oauth) elif api_type == 'p30': # ##### Premium API 30 days ##### data = utils.search_tweets_premium_api(json_payload=json.loads(config_parser.get('tweets', 'json_payload')), api=utils.TweeterPremiumAPI.day_30, oauth=oauth) elif api_type == 'pf': # ##### Premium API Full Archive ##### data = utils.search_tweets_premium_api(json_payload=json.loads(config_parser.get('tweets', 'json_payload')), api=utils.TweeterPremiumAPI.full_archive, oauth=oauth) else: assert False, 'First argument should be API type: s or p30 or pf' # ##### Get tweets by user id ##### # data = utils.get_tweets_by_user_id("1219328588", oauth) # Mongo Client mongo_client = MongoClient(host=config_parser.get('mongo', 'host'),