def replace_current_project(project: Project, path: str = ARCHIVE_LOCATION) -> None: file_lines = read_file_lines(path) block = "\n".join(file_lines[1:]) project = json_serialise(project) write_file(path, project + '\n' + block)
def dump_report(self, output_folder, prefix=''): report = '''KB ID: {kb_id} Total Query: {cnt_query} Success: {cnt_success} Failed: {cnt_failed} not_find_ep: {cnt_no_ep} find_ep_but_not_find_edge: {cnt_no_edge} find_ep_and_edge_but_lack_of_justification: {cnt_no_justi} =NO EDGE LIST= {no_edge_list} =NO JUSTI LIST= {no_justi_list} '''.format(kb_id=self.kb_id, cnt_query=str(self.total_query), cnt_success=str(len(self.success)), cnt_failed=str(self.total_query - len(self.success)), cnt_no_ep=str(len(self.failed[Failure.NO_EP])), cnt_no_edge=str(len(self.failed[Failure.NO_EDGE])), cnt_no_justi=str(len(self.failed[Failure.NO_JUSTI])), no_edge_list='\n'.join(self.failed[Failure.NO_EDGE]), no_justi_list='\n'.join(self.failed[Failure.NO_JUSTI])) write_file( report, '%s/%s%s_gr_stat.txt' % (output_folder.rstrip('/'), str(prefix), self.kb_id))
def asset_overlay(): a = AssetOverlay() a.calculate_asset_score() tracts = a.get_tract_data_geo() districts = a.get_district_data_geo() asset_data = a.get_asset_data() # merge tracts with asset data asset_tracts_geo = pd.merge(tracts, asset_data, how='left') write_file(asset_tracts_geo, 'asset_tracts') # aggregate to district district_asset = asset_tracts_geo.drop(['geometry', 'GEOFIPS', 'population', 'asset'], axis=1) \ .groupby('DISTRICT') \ .mean() \ .reset_index() # merge with district gdf districts_with_asset = pd.merge(districts, district_asset) districts_with_asset = districts_with_asset \ .drop(['OBJECTID', 'ID'], axis=1) # recalculate the overall asset score asset_columns = districts_with_asset[[ 'behavioural', 'food', 'schools', 'benefits', 'parks' ]] districts_with_asset['asset'] = asset_columns.mean(axis=1) write_file(districts_with_asset, 'asset_districts')
def run(xml_query): with open(xml_query) as f: xml = f.read() ans = Answer(Question(xml)).ask() print(ans) from src.utils import write_file write_file(ans, xml_query.split('.')[0] + '_response.xml')
def in_(dest_path, in_stream): input = json.load(in_stream) msg("Input: {}", input) version = input["version"]["version"] msg("Version Returned: {}", version) # Write out files write_file(os.path.join(dest_path, "version"), version) return {"version": {"version": version}}
def dump_report(self, output_folder, prefix=''): report = '''KB ID: {kb_id} Total Query: {cnt_query} Success: {cnt_success} Failed: {cnt_failed} =Failed LIST= {failed_list} '''.format(kb_id=self.kb_id, cnt_query=str(self.total_query), cnt_success=str(len(self.success)), cnt_failed=str(self.total_query - len(self.success)), failed_list='\n'.join(self.failed[Failure.ZEROHOP])) write_file( report, '%s/%s%s_zh_stat.txt' % (output_folder.rstrip('/'), str(prefix), self.kb_id))
def calculate_cumulative_scores(self): risk = self.risk_tracts[['GEOFIPS', 'DISTRICT', 'risk', 'geometry']] assets = self.asset_tracts[['GEOFIPS', 'DISTRICT', 'asset']] cumulative = pd.merge(risk, assets, on=['GEOFIPS', 'DISTRICT']) cumulative['cumulative'] = cumulative['asset'] - cumulative['risk'] write_file(cumulative, 'cumulative_tracts') # Calculate cumulative score by using the district risk and asset scores district_asset = self.asset_districts[[ 'DISTRICT', 'asset', 'geometry' ]] district_risk = self.risk_districts[['DISTRICT', 'risk']] cumulative_districts = pd.merge(district_asset, district_risk, on=['DISTRICT']) cumulative_districts['cumulative'] = cumulative_districts[ 'asset'] - cumulative_districts['risk'] write_file(gpd.GeoDataFrame(cumulative_districts), 'cumulative_districts')
def in_(dest_path, in_stream): input = json.load(in_stream) msg("Input: {}", input) api_root = input["source"]["api_root"] event_id = input["version"]["id"] event = api.get_event(api_root, event_id) msg("Event Returned: {}", event) collection_id = event["collection_id"] collection_version = event["version"] or "latest" content_server = event["content_server"]["hostname"] # Write out files write_file(os.path.join(dest_path, "id"), event_id) write_file(os.path.join(dest_path, "collection_id"), collection_id) write_file(os.path.join(dest_path, "version"), collection_version) write_file(os.path.join(dest_path, "content_server"), content_server) write_file(os.path.join(dest_path, "event.json"), event) return {"version": {"id": event_id}}
def risk_overlay(): r = RiskOverlay() r.calculate_risk_score() tracts = r.get_tract_data_geo() districts = r.get_district_data_geo() risk_data = r.get_risk_data() # merge tracts with risk risk_tract_geo = pd.merge(tracts, risk_data, how='left') # write tract geojson/csv write_file(risk_tract_geo, 'risk_tracts') # aggregate to district district_risk = risk_tract_geo \ .drop(['geometry', 'GEOFIPS', 'population', 'aces', 'acebalwt'], axis=1) \ .groupby('DISTRICT') \ .mean() \ .reset_index() # merge with district gdf districts_with_risk = pd.merge(districts, district_risk) districts_with_risk = districts_with_risk.drop(['OBJECTID', 'ID'], axis=1) # aces district scores must be calculated differently d = r.get_aces_districts() d['aces'] = array_to_percentiles(d['aces']) districts_with_risk = pd.merge(districts_with_risk, d) # recalculate the overall risk score risk_columns = districts_with_risk[[ 'crime', 'education', 'poverty', 'unemployment', 'aces' ]] districts_with_risk['risk'] = risk_columns.mean(axis=1) # write districts geojson/csv write_file(districts_with_risk, 'risk_districts')
def fun(df_item): ix, loc = df_item sleep(np.random.randint(1, 3)) try: location = geolocator.geocode(loc) if location is not None: lat, long = (location.latitude, location.longitude) print( str(ix) + ';' + str(loc) + ';' + str(lat) + ';' + str(long)) utils.write_file(str(ix) + ';' + str(loc) + ';' + str(lat) + ';' + str(long) + '\n', out_filename='coordinates.txt') else: utils.write_file(str(ix) + ';' + str(loc) + '\n', out_filename='coordinates-wrong-query.txt') except (GeocoderUnavailable, GeocoderQuotaExceeded, GeocoderTimedOut) as e: print(e) utils.write_file(str(ix) + ';' + str(loc) + '\n', out_filename='coordinates-failed.txt')
import sys sys.path.append('../') from src.utils import write_file from src.graph_query import GraphQuery from src.zerohop_query import ZerohopQuery # _, query_file, n2p_txt, output_prefix = sys.argv # gq = GraphQuery(query_file, n2p_txt) # write_file(gq.related_d2q, output_prefix + '_d2q.json') # write_file(gq.related_q2d, output_prefix + '_q2d.json') # write_file(gq.related_img_video, output_prefix + '_d2img_video_doc.json') zq = ZerohopQuery('/Users/dongyuli/isi/eval_queries/data/zerohop_queries.xml') print(zq.related_doc) write_file(zq.related_img_video, './zh_d2img_video_doc.json')
def replace_archive(projects: List[Project]) -> None: list_strings = [json_serialise(p) for p in projects] block = '\n'.join(list_strings) clean_archive(ARCHIVE_LOCATION) write_file(ARCHIVE_LOCATION, block)
def in_(dest_path, in_stream): input = json.load(in_stream) msg("Input: {}", input) api_root = input["source"]["api_root"] job_id = input["version"]["id"] job = api.get_job(api_root, job_id) msg("job Returned: {}", job) collection_id = job["collection_id"] collection_version = job["version"] or "latest" collection_style = job["style"] content_server = (job["content_server"] or {"hostname": None})["hostname"] # Write out files write_file(os.path.join(dest_path, "id"), job_id) write_file(os.path.join(dest_path, "collection_id"), collection_id) write_file(os.path.join(dest_path, "version"), collection_version) write_file(os.path.join(dest_path, "collection_style"), collection_style) write_file(os.path.join(dest_path, "content_server"), content_server) write_file(os.path.join(dest_path, "job.json"), job) return {"version": {"id": job_id}}