def prepare_education_train_data(self, cv_data): education = cv_data.get('education', {}) schools = education.get('SchoolOrInstitution', []) educations = [] for school in schools: school_name = school['School'][0].get( 'SchoolName', '') if school.get('School', []) else '' degrees = school.get('Degree', []) for degree in degrees: degree_name = degree.get('DegreeName', '') comment = re.sub(r'[\s]', ' ', degree.get('Comments', '')) date_of_attendance = degree['DatesOfAttendance'][ 0] if degree.get('DatesOfAttendance', []) else {} start_date, end_date = get_dates(date_of_attendance) entities = [] label_entity(comment, entities, degree_name, 'DEGREE') label_entity(comment, entities, school_name, 'ORG') label_entity(comment, entities, start_date, 'START_DATE') label_entity(comment, entities, end_date, 'END_DATE') educations.append((comment, {'entities': entities})) return educations
def analyze_enis(account_id, bucket, prefix, enis, ips, start, end, store_dir, ipdb=None, cmdb=None, ipranges=None, region=None, reject=None, targets=None, ports=None, tz=None, sample_count=20): logging.basicConfig(level=logging.INFO) logging.getLogger('botocore').setLevel(logging.WARNING) ports = map(int, ports) start, end = get_dates(start, end, tz) client = boto3.client('s3') log_prefix = "%s/%s/flow-log/%s/%s" % ( prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'), "00000000-0000-0000-0000-000000000000") resolver = IPResolver(ipdb, cmdb, ipranges) agg_in_traffic = Counter() agg_out_traffic = Counter() agg_inport_traffic = Counter() agg_outport_traffic = Counter() for eni, ip in zip(enis, ips): files = eni_download_flows(client, bucket, log_prefix, start, end, eni, store_dir) in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze( set(ips), eni_flow_stream(files, start, end), start=start, end=end, reject=reject, target_ips=targets, ports=ports) agg_in_traffic.update(in_traffic) agg_out_traffic.update(out_traffic) agg_inport_traffic.update(inport_traffic) agg_outport_traffic.update(outport_traffic) print("Inbound %d Most Commmon" % sample_count) for ip, bcount in resolve_ip_address(agg_in_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount)) print("Outbound %d Most Common" % sample_count) for ip, bcount in resolve_ip_address(agg_out_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount))
def write(self): """ Writes the simulated arrival times and scores of archaeological dates to csv files. """ timestamp = int(time()) sim_file = './results/sim{}.csv'.format(str(timestamp)) with open(sim_file, 'w') as file: file.write('x,y,bp\n') for coords in self.grid: if self.grid[coords]['arrival_time']: bp = self.grid[coords]['arrival_time'] x, y = to_lonlat(transform_coords(coords)) file.write(str(x) + ',' + str(y) + ',' + str(bp) + '\n') date_file = './results/dates{}.csv'.format(str(timestamp)) dates = get_dates() with open(date_file, 'w') as file: file.write('x,y,score\n') for coords in dates: sim_date = self.grid[coords]['arrival_time'] if sim_date in dates[coords]: score = (dates[coords][sim_date] / max(dates[coords].values())) else: score = 0 x, y = to_lonlat(transform_coords(coords)) file.write(str(x) + ',' + str(y) + ',' + str(score) + '\n')
def prepare_employment_train_data(self, cv_data): employment = cv_data.get('employment', {}) employers = employment.get('EmployerOrg', []) employments = [] for employer in employers: positions = employer.get('PositionHistory', []) for position in positions: title = position.get('Title', '') organization = position.get('OrgName', {}).get('OrganizationName', '') date_of_attendance = position['DatesOfAttendance'][ 0] if position.get('DatesOfAttendance', []) else {} start_date, end_date = get_dates(date_of_attendance) if title: employments.append((title, { 'entities': [(0, len(title), "DESIGNATION")] })) if organization: employments.append((organization, { 'entities': [(0, len(organization), "ORG")] })) if start_date: employments.append((organization, { 'entities': [(0, len(start_date), "START_DATE")] })) if end_date: employments.append((organization, { 'entities': [(0, len(end_date), "END_DATE")] })) return employments
def back_populator(): # Start date of season start_date = datetime.date(2020, 12, 22) end_date, _ = get_dates() while start_date <= end_date: Report().generate_reports(start_date) Scores(start_date).poll() start_date = start_date + datetime.timedelta(days=1)
def __init__(self): self.states = get_states() self.reports_file = 'reports.json' self.reports_dir = 'reports/' self.image_dir = 'static/images/' self.api_endpoint = 'https://covid-api.com/api/' self.dates_file = 'dates.json' self.first_date = '2020-04-16' self.dates = get_dates(self.first_date) self.reports = [] self.reports_parsed = [] dump_json(self.dates_file, self.dates)
def analyze_enis( account_id, bucket, prefix, enis, ips, start, end, store_dir, ipdb=None, cmdb=None, ipranges=None, region=None, reject=None, targets=None, ports=None, tz=None, sample_count=20): logging.basicConfig(level=logging.INFO) logging.getLogger('botocore').setLevel(logging.WARNING) ports = map(int, ports) start, end = get_dates(start, end, tz) client = boto3.client('s3') log_prefix = "%s/%s/flow-log/%s/%s" % ( prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'), "00000000-0000-0000-0000-000000000000") resolver = IPResolver(ipdb, cmdb, ipranges) agg_in_traffic = Counter() agg_out_traffic = Counter() agg_inport_traffic = Counter() agg_outport_traffic = Counter() for eni, ip in zip(enis, ips): files = eni_download_flows( client, bucket, log_prefix, start, end, eni, store_dir) in_traffic, out_traffic, inport_traffic, outport_traffic = eni_log_analyze( set(ips), eni_flow_stream(files, start, end), start=start, end=end, reject=reject, target_ips=targets, ports=ports) agg_in_traffic.update(in_traffic) agg_out_traffic.update(out_traffic) agg_inport_traffic.update(inport_traffic) agg_outport_traffic.update(outport_traffic) print("Inbound %d Most Commmon" % sample_count) for ip, bcount in resolve_ip_address( agg_in_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount)) print("Outbound %d Most Common" % sample_count) for ip, bcount in resolve_ip_address( agg_out_traffic, resolver, start, end).most_common(sample_count): print("%s %s" % ip, human_size(bcount))
def animate_better_barchart(data_dict: Dict[str, np.ndarray], metadata: Dict[str, Any], savedir: str): dates = get_dates(Message.initial_date, Message.final_date) df = pd.DataFrame.from_dict(data_dict) df.index = dates bcr.bar_chart_race( df, n_bars=metadata['max_names_per_graph'], steps_per_period=metadata['animation']['fps'], title=metadata['title'], filename=os.path.join(savedir, f'{metadata["title"]}.mp4'), period_length=metadata['animation']['bcr_period'] )
def search(question): urls = utils.get_urls( question ) #Sends question to get_urls function in utils.py, which googles query and generates list of suitable urls if utils.getqtype(question) == "who": names_dict = utils.get_names(urls, question) if utils.getqtype(question) == "when": names_dict = utils.get_dates(urls) number = 0 for i in names_dict: if names_dict[i] > number: answer = i number = names_dict[i] return render_template("search.html", question=question, urls=urls, names_dict=names_dict, answer=answer)
def eval(self): """ Returns a score from 0 to 1 of model fitness based on match with archaeological dates. """ total_score = 0 dates = get_dates() for coords in dates: score = 0 sim_date = self.grid[coords]['arrival_time'] if sim_date and sim_date in dates[coords]: # Normalize probability distribution score += (dates[coords][sim_date] / max(dates[coords].values())) total_score += score return total_score / len(dates)
def update3(input6, input7, input8, input31): city1 = get_given_city_data(city_df, input6) city2 = get_given_city_data(city_df, input7) dfresult = city1 dfresult[input6] = city1[input8] dfresult[input7] = city2[input8] fig = None if input31 == 'plot': fig = px.line( dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input31 == 'bar': fig = px.bar(dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input31 == 'histogram': fig = px.histogram( dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input31 == 'area': fig = px.area( dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input31 == 'funnel': fig = px.funnel( dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input31 == 'scatter': fig = px.scatter( dfresult, x=get_dates(city_df), y=[input6, input7], title="Covid-19 : Cases comparison between %s and %s . " % (input6, input7), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") fig.update_layout(font_family="Courier New", font_color="blue", title_font_family="Times New Roman", title_font_color="red", legend_title_font_color="green", transition_duration=500) return fig
def update_figure(sta1, input2): fig = None state = get_given_state_data(state_df, sta1) if input2 == 'plot': fig = px.line(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input2 == 'bar': fig = px.bar(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input2 == 'area': fig = px.area(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input2 == 'histogram': fig = px.histogram(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input2 == 'funnel': fig = px.funnel(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input2 == 'scatter': fig = px.scatter(state, x=get_dates(state_df), y=['Confirmed', 'Active', 'Recovered', 'Deceased'], title="Covid-19 data of state.", labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") fig.update_layout(font_family="Courier New", font_color="blue", title_font_family="Times New Roman", title_font_color="red", legend_title_font_color="green", transition_duration=500) return fig
def update2(input3, input4, input5, input21): state1 = get_given_state_data(state_df, input3) state2 = get_given_state_data(state_df, input4) dfresult = state1 dfresult[input3] = state1[input5] dfresult[input4] = state2[input5] fig = None if input21 == "plot": fig = px.line( dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input21 == "bar": fig = px.bar(dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input21 == "histogram": fig = px.histogram( dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input21 == "area": fig = px.area( dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input21 == "funnel": fig = px.funnel( dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") if input21 == "scatter": fig = px.scatter( dfresult, x=get_dates(state_df), y=[input3, input4], title="Covid-19 : Cases comparison between %s and %s . " % (input3, input4), labels={ "x": "Time", "value": "Number of Cases" }, template="presentation") fig.update_layout(font_family="Courier New", font_color="blue", title_font_family="Times New Roman", title_font_color="red", legend_title_font_color="green", transition_duration=500) return fig
#!/usr/bin/python import utils from stockmarket import Stock, Market sap_tickers = utils.get_tickers() # Standard & Poor's tickers stocks = utils.get_stocks_from_tickerslist(sap_tickers) dates = utils.get_dates(stocks[0].ticker) market = Market(stocks) spread = [] market_performance = [] dates_shown = [] STEP = 30 for i in range(STEP, len(stocks[0].values)-STEP, STEP): previous = i - STEP now = i next = i + STEP strong_stocks = [s for s in stocks if s.getOffensive(previous, now) >= 100 and s.getDefensive(previous, now) <= 100] weak_stocks = [s for s in stocks if s.getOffensive(previous, now) <= 100 and s.getDefensive(previous, now) >= 100] mean_strong_performance = utils.mean([s.getPerformance(now, next) for s in strong_stocks]) mean_weak_performance = utils.mean([s.getPerformance(now, next) for s in weak_stocks]) spread.append(mean_strong_performance - mean_weak_performance) market_performance.append(market.getPerformance(previous, now)) # to look for correlations with the spread dates_shown.append(dates[now]) # dates to be shown in the X axis # format the data properly and save it in a file
def analyze_app( app, env, account_id, bucket, prefix, store_dir, resources, ipdb, ipranges, start, end, tz, sink, period, sample_count, debug): """Analyze flow log records for application and generate metrics per period""" logging.basicConfig(level=logging.INFO) logging.getLogger('botocore').setLevel(logging.WARNING) executor = debug and MainThreadExecutor or ThreadPoolExecutor start, end = get_dates(start, end, tz) resolver = IPResolver(ipdb, ipdb, ipranges) for rtype_name in resources: rtype = Resource.get_type(rtype_name) resource_map = { rtype.id(r): r for r in rtype.get_resources(ipdb, start, end, app, env)} log.info("App:%s Env:%s Type:%s Found:%d", app, env, rtype_name, len(resource_map)) with sqlite3.connect(ipdb) as db: db.row_factory = row_factory cursor = db.cursor() cursor.execute( 'select * from enis where resource_type in (%s)' % ( ", ".join(["'%s'" % r for r in resource_map.keys()]))) enis = list(cursor) eni_map = {e['eni_id']: e for e in enis} # TODO: Download should be doing date bits here across the range of days. log_prefix = "%s/%s/flow-log/%s/%s" % ( prefix.rstrip('/'), account_id, start.strftime('%Y/%m/%d'), "00000000-0000-0000-0000-000000000000") f_downloads = {} f_metrics = {} files = {} # should probably just queue this out to distributed worker pool with executor(max_workers=5) as w: client = boto3.client('s3') for e in enis: f_downloads[ w.submit( eni_download_flows, client, bucket, log_prefix, start, end, e['eni_id'], store_dir)] = e for f in as_completed(f_downloads): if f.exception(): log.warning( "error processing eni %s download: %s", eni_map[f_downloads[f]], f.exception()) continue e = f_downloads[f] files[e['eni_id']] = f.result() ipset = {e['ip_address'] for e in enis} for eni_id, files in files.items(): stream = eni_flow_stream(files, start, end) f_metrics[w.submit( process_eni_metrics, eni_map[eni_id], ipset, stream, start, end, period, sample_count, resolver, sink)] = eni_id for f in as_completed(f_metrics): if f.exception(): log.warning( "error processing eni %s download %s", eni_map[f_metrics[f]], f.exception()) continue
def poll(): _, yesterday = get_dates() print(f"Polling for date: {yesterday}") Report().generate_reports(yesterday) Scores(yesterday).poll()
def draw_line_chart( data_dict: Union[List[Tuple[str, np.ndarray]], Dict[str, np.ndarray]], init_date: datetime, final_date: datetime, metadata: Dict[str, Any], fig=None, ax=None ) -> None: anex_data = get_anex_data(data_dict) if anex_data: (init_date, final_date) = anex_data (start, final) = fit_dates_to_history_time_length(init_date, final_date) plt.clf() if not fig or not ax: fig, ax = plt.subplots(figsize=(15, 8)) ax.clear() locator = mdates.AutoDateLocator(minticks=3, maxticks=10) formater = mdates.ConciseDateFormatter(locator) dates = get_dates(init_date, final_date) max_names_per_graph = metadata.get('max_names_per_graph', 0) curves_to_plot = metadata.get('plot_keys', ['']) if not isinstance(curves_to_plot, list): raise SystemExit('The names to plot should be list of strings') if isinstance(data_dict, dict): data_dict = [(k, v) for k, v in data_dict.items()] filtered_data_dicts: List[Tuple[str, np.ndarray]] if metadata.get('plot_total'): curves_to_plot.append('TOTAL') filtered_data_dicts = filter_data_to_plot( sorted(data_dict, key=lambda kv: np.sum(kv[1]), reverse=True )[:max_names_per_graph], curves_to_plot, max_names_per_graph ) if metadata.get('plot_total'): ax2 = ax.twinx() ax2.clear() ax2.set_ylabel('Total') delay = len(dates[:final]) - len(filtered_data_dicts[0][1][start:final]) # type: ignore # noqa ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formater) for name, info in filtered_data_dicts: if name == 'TOTAL': ax2.xaxis.set_major_locator(locator) ax2.xaxis.set_major_formatter(formater) ax2.plot(dates[delay:final], info[start:final], label=name, color='k', linewidth=1) continue ax.xaxis.set_major_locator(locator) ax.xaxis.set_major_formatter(formater) ax.plot(dates[delay:final], info[start:final], label=name) ax.set_title(metadata['title']) ax.grid(True) fig.legend()
def test_get_Dates(): data = get_state_data()[0] expected_output = '2020-07-07' assert any(i == expected_output for i in list(get_dates(data)))
def main(): """ e.g. python download.py -n rthk -s 2019-09-01 -e 2019-09-30 """ parser = argparse.ArgumentParser() parser.add_argument('-n', '--name', type=str, help='name of source', required=True) parser.add_argument('-c', '--channels', type=str, help='name of auido channels', default='all') parser.add_argument('-d', '--dir', type=str, help='directory for downloaded files') # only for rthk parser.add_argument('-s', '--start', type=str, help='start date') parser.add_argument('-e', '--end', type=str, help='end date') parser.add_argument('--n_speaker', type=int, help='number of speakers in the youtube channel', default=2) arg = parser.parse_args() name = arg.name if arg.channels == 'all': channels = list(BASE_URL[name].keys()) else: channels = arg.channels.split(',') if arg.dir is None: download_dir = DEFAULT_DIR_ROOT + '/' + name + '/wav' else: download_dir = arg.dir n_speaker = arg.n_speaker params = {} if name == 'rthk': download_helper = mapper[name](name, BASE_URL, download_dir) start_date = arg.start end_date = arg.end dates = utils.get_dates(start_date, end_date) for channel in channels: params['channel'] = channel for date in dates: params['date'] = date done = False while not done: try: start_time = time.perf_counter() download_helper.download(params) print('Time needed for this sample: {}'.format( time.perf_counter() - start_time)) done = True # turn off the loop except ConnectionResetError: print('Sleep for 5 secs to avoid from blocking...') time.sleep(5) except Exception as exe: print('download.py') traceback.print_tb(exe.__traceback__) elif name == 'youtube': download_helper = mapper[name](name, BASE_URL, download_dir, api_key=GOOGLE_API_KEY, n_per_playlist=N_PER_PLAYLIST) playlist_ids = MULTI_SPEAKER_PLAYLIST if n_speaker > 1 else SINGLE_SPEAKER_PLAYLIST for playlist_id in playlist_ids: download_helper.download(playlist_id)
int(game['awayTeam']['teamId']), game['gameStatusText'], int(game['homeTeam']['score']), int(game['awayTeam']['score']), int(game['homeTeam']['wins']), int(game['awayTeam']['wins']), int(game['homeTeam']['losses']), int(game['awayTeam']['losses']), f"{game['awayTeam']['teamName']} at {game['homeTeam']['teamName']}" ] data.append(obj) return data def upload_data(self, parsed_data): rows = self.upload_to_db(parsed_data) return parsed_data def poll(self): raw_data = self.get_data() parsed_data = self.parse_data(raw_data) data = self.upload_data(parsed_data) if __name__ == '__main__': # print(Scores('04/2/2021').poll()) start_date = datetime.date(2020, 12, 22) # start_date = datetime.date(2021, 4, 2) end_date, _ = get_dates() while start_date <= end_date: Scores(start_date).poll() start_date = start_date + datetime.timedelta(days=1)
args = parser.parse_args() maybe_make_dir('weights') maybe_make_dir('portfolio_val') timestamp = time.strftime('%Y%m%d%H%M') stocks = Stocks() portfolio = Portfolio() data = np.around(get_data(ticker=args.ticker)) dataSplit = data.shape[1] - args.testingDays train_data = data[:, :dataSplit] test_data = data[:, dataSplit:] dates = (get_dates(ticker=args.ticker)) dataesSplit = dates.shape[1] - args.testingDays train_dates = dates[:, :dataSplit] test_dates = dates[:, dataSplit:] env = TradingEnv(stocks, train_data, train_dates, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(stocks, test_data, test_dates, args.initial_invest)