def main(): # 保存站到站 temp_trains = ['C6903','C6905','C6907','C6909','C6911','C6913','C6915','C6917','C6919','C6921','C6923','C6925','C6953','C6955','C6957','C6959','C6961','C6963','C6965','C6967','C6969','C6971','C6973','C6975','C6902','C6904','C6906','C6908','C6910','C6912','C6918','C6920','C6922','C6924','C6926','C6952','C6954','C6956','C6958','C6960','C6962','C6966','C6968','C6970','C6972','C6974','C6976','C6929','C6914','C6928','C6930','C6978','C6964','C6980','C6977','C6979','C6981'] for code in temp_trains: names = DataBaseUtil.select("select name from train_line_stop where train_code = '%s' order by sequence" % code) if len(names) > 0: key = 0 name_list = {} for n in names: name_list[key] = n[0] key = key+1 kvs = name_list.items() i = -1 for ki,vi in kvs: i += 1 j = -1 for kj,vj in kvs: j += 1 if i == j: continue start_station = vi end_station = vj try: start_py = DataService.get_alia_by_station(start_station) if start_py == '': start_py = py_util.hanzi2pinyin_split(string=start_station, split="", firstcode=False) end_py = DataService.get_alia_by_station(end_station) if end_py == '': end_py = py_util.hanzi2pinyin_split(string=end_station, split="", firstcode=False) DataService.save_s2s(start_station,start_py,end_station,end_py) except: t, v, tb = sys.exc_info()
def register_topic(topic): request_json = request.get_json() agent_ip = request_json['agent_ip'] agent_name = request_json['agent_name'] agent = Agent("", topic, agent_name, agent_ip) dataService = DataService(agent, "", "") return dataService.insertAgent()
class DataServiceTest(unittest.TestCase): def __init__(self, methodName): super().__init__(methodName) self.dao = None # DOC self.service = None # SUT def test_csv_data(self): # Setup self.dao = Mock() # Mock() replaces DataAccessObject self.dao.read_data.return_value = [0.8273, 0.7822, 0.9731, 0.1239, 0.9898] self.service = DataService(self.dao) # Exercise values = self.service.csv_data() # State verification expected = "0.8273,0.7822,0.9731,0.1239,0.9898" self.assertEqual(expected, values) # Behavioral verification self.dao.read_data.assert_called_once() self.assertEqual(1, self.dao.read_data.call_count) def test_data_access_error(self): # Setup self.dao = Mock() # Mock() replaces DataAccessObject self.dao.read_data.side_effect = DataAccessError('Can not read data!') self.service = DataService(self.dao) # Exercise with self.assertRaises(ServiceError): self.service.csv_data()
def test_data_access_error(self): # Setup self.dao = Mock() # Mock() replaces DataAccessObject self.dao.read_data.side_effect = DataAccessError('Can not read data!') self.service = DataService(self.dao) # Exercise with self.assertRaises(ServiceError): self.service.csv_data()
def parse_content(self, content, link_job): json_obj = json.loads(content) list = json_obj['list'] for shop in list: try: category_id = shop['categoryId'] city_id = shop['cityId'] shop_id = shop['id'] url = self.shop_url_pattern % shop_id DataService.save_dp_shop(url, shop_id, city_id, category_id) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
def test_csv_data(self): # Setup self.dao = Mock() # Mock() replaces DataAccessObject self.dao.read_data.return_value = [0.8273, 0.7822, 0.9731, 0.1239, 0.9898] self.service = DataService(self.dao) # Exercise values = self.service.csv_data() # State verification expected = "0.8273,0.7822,0.9731,0.1239,0.9898" self.assertEqual(expected, values) # Behavioral verification self.dao.read_data.assert_called_once() self.assertEqual(1, self.dao.read_data.call_count)
def data_service_handler(): """Get or create a DataService instance for a tenant.""" tenant = tenant_handler.tenant() handler = tenant_handler.handler('data', 'data', tenant) if handler is None: handler = tenant_handler.register_handler( 'data', tenant, DataService(tenant, app.logger)) return handler
def sample_test(): data, char_to_ix, ix_to_char = DataService.get_data() chars = list(set(data)) data_size, vocab_size = len(data), len(chars) np.random.seed(2) _, n_a = 20, 100 Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn( n_a, n_a), np.random.randn(vocab_size, n_a) b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1) parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by} indices = ModelService.sample(parameters, char_to_ix, 0) print("Sampling:") print("list of sampled indices:\n", indices) print("list of sampled characters:\n", [ix_to_char[i] for i in indices])
api = Api( app, version='1.0', title='Data service API', description='API for QWC Data service', default_label='Data edit operations', doc='/api/', ) # disable verbose 404 error message app.config['ERROR_404_HELP'] = False # Setup the Flask-JWT-Extended extension jwt = jwt_manager(app, api) # create data service data_service = DataService(app.logger) # Api models geojson_crs_properties = create_model(api, 'CRS Properties', [ [ 'name', fields.String(required=True, description='OGC CRS URN', example='urn:ogc:def:crs:EPSG::3857') ], ]) geojson_crs = create_model( api, 'CRS', [[ 'type', fields.String(required=True, description='CRS type', example='name')
def parse_content(self,content,link_job): json_obj = json.loads(content) data = json_obj['data']['data'] typeName = data[0]['train_class_name'] start_time = data[0]['start_time'] pre_time = start_time duration = 0 name_list = {} days = 0 train_no = link_job.task.train_no # 保存站点数据 for i,d in enumerate(data): arrive_time = d['arrive_time'] depart_time = d['start_time'] if i==0 : arrive_time = 'null' if i != 0: if compareSS(pre_time,arrive_time)<0: days +=1 duration = duration + getDaysSS(start_time,d['arrive_time'],days) pre_time = arrive_time if int(d['station_no'])==1: raise Exception stayTime = 0 if i!=0 and i!= len(data)-1 : stayTime = d['stopover_time'] try: stayTime = stayTime[0:str(stayTime).index('分')] except Exception as e: stayTime=0 # raise e if i == len(data)-1: depart_time = 'null' try: train_stop = TrainStop(link_job.task.train_code,d['station_name'],int(d['station_no']),arrive_time,stayTime,days,duration,depart_time,typeName,train_no) DataService.save_train_stop(train_stop) log.info((link_job.task.train_code,d['station_name'],int(d['station_no']),arrive_time,stayTime,days,duration,depart_time,typeName,train_no)) name_list.update({d['station_no']:d['station_name']}) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb))) # 保存站到站 kvs = name_list.items() i = -1 for ki,vi in kvs: i += 1 j = -1 for kj,vj in kvs: j += 1 if i == j: continue start_station = vi end_station = vj try: start_py = DataService.get_alia_by_station(start_station) if start_py == '': start_py = py_util.hanzi2pinyin_split(string=start_station, split="", firstcode=False) end_py = DataService.get_alia_by_station(end_station) if end_py == '': end_py = py_util.hanzi2pinyin_split(string=end_station, split="", firstcode=False) DataService.save_s2s(start_station,start_py,end_station,end_py) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
from data_service import DataService from model_service import ModelService data, char_to_ix, ix_to_char = DataService.get_data() parameters = ModelService.model(data, ix_to_char, char_to_ix, verbose=True)
from environment import Environment from data_service import DataService import threading import time '''the application iterates at a frequency defined at runtime, triggering the data-service task on each iteration''' def iterate(): '''calls the task method on the data service, and requeues itself in a thread''' timeout = time.time() + freq ds.task() print('completed.. waiting {} seconds before next run'.format( int(round(timeout - time.time())))) threading.Timer(timeout - time.time(), iterate).start() ds = DataService() freq = Environment.get_frequency() iterate()
def getTopic(gitUrl): ds = DataService() result = ds.get_topic(gitUrl) print '**********' print result return result
#!/usr/bin/env python3 from config import Config from data_service import DataService from parser import run import logging logging.basicConfig(level=logging.INFO, filename='parse.log') if __name__ == '__main__': config = Config() data_service = DataService( config.get('db_connection_url'), config.get('db_name'), ) run(config.get('start_url'), data_service)
def main(): data_service = DataService() news_sources = data_service.refresh_and_return_news_sources() source_articles = {} for news_source in news_sources: articles = data_service.pull_articles_from_source(news_source['url']) source_articles[news_source['_id']] = articles if 'docs' in source_articles.keys(): for article in source_articles['docs']['source']['enriched'][ 'url']: pass stop_here = "" stop_here = "" query_url = 'https://topics.mediacloud.org/api/collections/9139458/stories' parameters = { 'snapshotId': '1477', 'timespandId': '39849', 'sort': 'inlink', 'limit': '5000' } # response = requests.get(query_url, params=parameters) # stop_here = "" client = MongoClient('localhost', 27017) database = client['AkashicRecords'] articles = database['historic_political_article_data'] cbt = database['cleaned_breitbart_test'] articles_found = [] for article in articles.find(): articles_found.append(article) cleaned_articles = [] for article in articles_found: if len(article['entities']) > 5: article['entities'] = article['entities'][:5] stop_here = "" for article in articles_found: relation_by_sent_id = {} for relation in article['relations']: try: sent_id = hash(relation['sentence']) if 'subject' in relation.keys(): if 'object' in relation.keys(): pass if 'object' in relation.keys(): pass except Exception as e: print(e.args) # watson_service = WatsonService() # articles_found = data_service.pull_from_source('http://www.breitbart.com') cleaned_articles = data_service.clean_source_articles( 'breitbart.com', articles_found) cbt.insert_many(cleaned_articles) article_data_list = [] for article in articles.find(): article_element_types = [] relevance_threshold = 0.80 for entity in article['entities']: if entity['relevance'] > relevance_threshold: if 'knowledgeGraph' in entity.keys(): if 'typeHierarchy' in entity['knowledgeGraph'].keys(): article_element_types.append( entity['knowledgeGraph']['typeHierarchy'].split( '/')[1:]) for keyword in article['keywords']: if keyword['relevance'] > relevance_threshold: if 'knowledgeGraph' in keyword.keys(): if 'typeHierarchy' in keyword['knowledgeGraph'].keys(): article_element_types.append( keyword['knowledgeGraph']['typeHierarchy'].split( '/')[1:]) article_data_list.append(article_element_types) stop_here = ""
def parse_content(self, content, link_job): content = content[content.index('(') + 1:content.rindex(')')] json_obj = json.loads(content) trains = json_obj['data']['trains'] for train in trains: try: train_code = train['trainNum'] start_station = train['fromCity'] end_station = train['toCity'] origin = train['beginPlace'] terminal = train['endPlace'] depart_time = train['fromTime'] arrive_time = train['toTime'] duration = int(train['usedTimeInt']) * 60 note = train['note'] A1 = get_price('hardseat', train['ticketState']) A2 = get_price('softseat', train['ticketState']) A3 = get_price('hardsleepermid', train['ticketState']) A4 = get_price('softsleeperdown', train['ticketState']) A6 = get_price('advancedsoftsleeper', train['ticketState']) A9 = get_price('businessseat', train['ticketState']) O = get_price('secondseat', train['ticketState']) M = get_price('firstseat', train['ticketState']) P = get_price('specialseat', train['ticketState']) sequence = 0 days = 0 stayTime = 0 grade = '' state = 0 train_no = '' exist = DataService.check_traincode_exist(train_code) # is_correct 0:错误信息 1:正确 is_correct = 1 if len(train['ticketState']) == 0: is_correct = 0 # state 0:正常 1:未收录此车次 2:收录此车次但是此站点已经取消 if not exist: state = 1 else: station_s = DataService.find_station( train_code, start_station) station_e = DataService.find_station( train_code, end_station) if len(station_e) != 0 and len(station_s) != 0: days_s = int(station_s[0][6]) if date_util.compareSS(station_s[0][4], station_s[0][8]) < 0: days_s += 1 sequence = station_e[0][3] days = station_e[0][6] - days_s stayTime = station_e[0][5] grade = station_e[0][9] train_no = station_e[0][10] else: state = 2 price = Price(train_code, end_station, start_station, depart_time, arrive_time, duration, A1, A2, A3, A4, O, M, A6, A9, grade, days, P, origin, terminal, sequence, train_no, stayTime, is_correct, state, note) DataService.save_train_price(price) except: t, v, tb = sys.exc_info() log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
from data_service import DataService from plot_drawing import PlotDrawingService ds = DataService("1 1 2\n2 2 3\n3 3 4\n4 4 5") x, ys = ds.columns(0, [1, 2]) PlotDrawingService().draw_plot(x, ys, "time", "mass", "plot1.png")
import logging from constants import LOG_FILENAME from data_service import DataService logging.basicConfig(filename=LOG_FILENAME, level=logging.INFO) data_service = DataService() # data_service.execute_scout_data_generation_cycle(page_number=6014) data_service.copy_geo_hash()
def main(): data_service = DataService() news_journals = data_service.get_known_online_news_journals() news_journal_articles = [] for news_journal in news_journals: try: articles_of_journal_over_timeframe = data_service.get_articles_by_new_journal_over_timeframe( news_journal['url']) except Exception as query_failure: print( 'source name, source id, resulted in xmany articles or none at all within the timeframe' ) source_articles_by_source_id = {} for news_source in news_sources: articles = data_service.get_articles_from_online_news_journal_by_url( news_source['url']) source_articles_by_source_id[news_source['_id']] = articles if 'docs' in source_articles_by_source_id.keys(): for article in source_articles_by_source_id['docs']['source'][ 'enriched']['url']: pass # find the best possible news across one or more sources the most both singularly about a topic and with an # interesting or strong perspective # want news that is unique due to its paring of strange characters doing odd things, expressing a strong # and unique opinion towards common actions performed by a character in novel way # Hierarchy of boxes model, where the color of the lies of each rectange can convey different information, same as the # spacing between the rectangles stop_here = "" stop_here = "" query_url = 'https://topics.mediacloud.org/api/collections/9139458/stories' parameters = { 'snapshotId': '1477', 'timespandId': '39849', 'sort': 'inlink', 'limit': '5000' } # response = requests.get(query_url, params=parameters) # stop_here = "" client = MongoClient('localhost', 27017) database = client['AkashicRecords'] articles = database['historic_political_article_data'] cbt = database['cleaned_breitbart_test'] articles_found = [] for article in articles.find(): articles_found.append(article) cleaned_articles = [] for article in articles_found: if len(article['entities']) > 5: article['entities'] = article['entities'][:5] stop_here = "" for article in articles_found: relation_by_sent_id = {} for relation in article['relations']: try: sent_id = hash(relation['sentence']) if 'subject' in relation.keys(): if 'object' in relation.keys(): pass if 'object' in relation.keys(): pass except Exception as e: print(e.args) # watson_service = WatsonService() # articles_found = data_service.pull_from_source('http://www.breitbart.com') cleaned_articles = data_service.clean_source_articles( 'breitbart.com', articles_found) cbt.insert_many(cleaned_articles) article_data_list = [] for article in articles.find(): article_element_types = [] relevance_threshold = 0.80 for entity in article['entities']: if entity['relevance'] > relevance_threshold: if 'knowledgeGraph' in entity.keys(): if 'typeHierarchy' in entity['knowledgeGraph'].keys(): article_element_types.append( entity['knowledgeGraph']['typeHierarchy'].split( '/')[1:]) for keyword in article['keywords']: if keyword['relevance'] > relevance_threshold: if 'knowledgeGraph' in keyword.keys(): if 'typeHierarchy' in keyword['knowledgeGraph'].keys(): article_element_types.append( keyword['knowledgeGraph']['typeHierarchy'].split( '/')[1:]) article_data_list.append(article_element_types) stop_here = ""