def download_caricatures_form_links(links): table_of_content = [] for i,cp in links: r=0 try: car_page_to_open = urllib2.urlopen("http://www.haaretz.co.il"+cp, timeout=15) caricature_page = car_page_to_open.read() except (urllib2.URLError, httplib.IncompleteRead, urllib2.URLError, socket.timeout), e : r=r+1 print "Re-trying, attempt -- ",r time.sleep(2) pass car_page_to_open.close() picture_links = ["http://www.haaretz.co.il"+re.findall('.*?source srcset="(.*?)"', caricature_page)[0]] pic_date = re.findall('.*?itemprop="datePublished">(.*?) ', caricature_page) if len(pic_date)>0: pd_data = pic_date[0].split('.') pd = '.'.join([pd_data[1],pd_data[0],pd_data[2]]) else: pd = 'non_date' pic_print_date = '_'.join(pd.split('.')) for pl in picture_links: print '.', head, tail = os.path.splitext(pl) dest_file = os.path.join(folder,'images',pic_print_date+tail) count = 0 while os.path.exists(dest_file): count += 1 dest_file = os.path.join(folder,'images', '%s-%d%s' % (pic_print_date, count, tail)) #dowload the file # try: # urllib.urlretrieve(pl,dest_file) # except (urllib2.URLError, urllib2.URLError, socket.timeout), e : # r=r+1 # print "Re-trying Retrive , attempt -- ",r # time.sleep(2) # pass ff, fname = os.path.split(dest_file) table_of_content.append({'End Date': '/'.join(pd.split('.')), 'Headline': '<h1 style="color:black;text-align:right;font-size:35px">please add headline</h1>', 'Media': pl, 'Media Caption': '', 'Media Credit': 'caricatures from Haaretz newspaper', 'Media Thumbnail': '', 'Start Date': '/'.join(pd.split('.')), 'Tag': '', 'Text': '<p style="text-align:right;font-size:20px">add text</p>', 'Type': ''})
def call_estimate_wait_time(n_clicks, backlog_dict, eb_type, mf_msg, pd, future_supply, future_so): if(pd): pd = pd.split(' ')[0] pred_results = estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict) if(n_clicks): user_ip = get_ip() new_prediction_record = { 'timestamp': datetime.datetime.now().__str__(), 'ip': user_ip, 'pd': pd, 'eb_type': eb_type, 'multiFactor': mf_msg, 'future_supply': future_supply, 'future_so':future_so, 'results': pred_results } res = requests.post(f'https://{db_url}/prediction_record.json', data = json.dumps(new_prediction_record)) res = requests.get(f'https://{db_url}/prediction_count.json') count_record = res.json() count_record['count'] += 1 res = requests.put(f'https://{db_url}/prediction_count.json', data = json.dumps(count_record)) return pred_results else: return ''
def buildDices(self, cd, pd): c = cd.split(",") p = pd.split(",") l = len(c) if l == 6: for f in range(l): self.cdice[f] = PPP.setDiceFace(c[f]) self.pdice[f] = PPP.setDiceFace(p[f]) else: print("-- Configuration data is strange...")
def pd_to_FY(pd): yr, mm, _ = pd.split('-') yr = int(yr) mm = int(mm) fy = yr + 1 if mm >= 10 else yr pd = datetime.datetime.strptime(pd, '%Y-%m-%d') fy_begin = datetime.datetime(fy - 1, 10, 1) days_after_fy_start = (pd - fy_begin).days return fy, days_after_fy_start
def time_formation(pd): #new=pd.split(" ") #k=new[1] #p_period=get_period(k) new=pd.split(" ") p_Date =new[0] p_time = new[1] new=p_time.split(":") p_hour=new[0] p_minute=new[1] if p_minute>='29': p_minute='30' else: p_minute='00' p_date_time=p_Date+" "+p_hour+":"+p_minute+":00" return p_date_time,p_Date,p_time
def main(): # get arguments config, args = get_asr_args() # Train / Test if not args.eval: random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed) if not args.rnnlm: if not args.test: # Train ASR from asr.solver import Trainer as Solver else: # Test ASR from asr.solver import Tester as Solver else: # Train RNNLM from asr.solver import RNNLM_Trainer as Solver solver = Solver(config, args) solver.load_data() solver.set_model() solver.exec() # Eval else: decode = pd.read_csv(args.file,sep='\t',header=None) truth = decode[0].tolist() pred = decode[1].tolist() cer = [] wer = [] for gt,pd in zip(truth,pred): wer.append(ed.eval(pd.split(' '),gt.split(' '))/len(gt.split(' '))) cer.append(ed.eval(pd,gt)/len(gt)) print('CER : {:.6f}'.format(sum(cer)/len(cer))) print('WER : {:.6f}'.format(sum(wer)/len(wer))) print('p.s. for phoneme sequences, WER=Phone Error Rate and CER is meaningless.')
import editdistance as ed import pandas as pd import argparse # Arguments parser = argparse.ArgumentParser(description='Evaluate decoding result.') parser.add_argument('--file', type=str, help='Path to decode result file.') paras = parser.parse_args() print(paras.file) decode = pd.read_csv(paras.file,header=None, sep="\t") truth = decode[0].tolist() pred = decode[1].tolist() cer = [] wer = [] for gt,pd in zip(truth,pred): wer.append(ed.eval(pd.split(' '),gt.split(' '))/len(gt.split(' '))) cer.append(ed.eval(pd,gt)/len(gt)) print('CER : {:.6f}'.format(sum(cer)/len(cer))) print('WER : {:.6f}'.format(sum(wer)/len(wer))) print('p.s. for phoneme sequences, WER=Phone Error Rate and CER is meaningless.')
# ad_json=json.loads(ad) # answers=ad_json['answers'][0] # t = answers['answerContent']['crowd-semantic-segmentation'] # imgData = t['labeledImage']['pngImageData'] # img = base64.b64decode(imgData) # with open('C:/tmpC:/tmp_mask.png', 'wb') as f: # f.write(img) # change 5 Check if img is empty or not, not use img != 0 if np.sum(io.imread('C:/tmpC:/tmp_mask.png')) != 0: pkey = f"""sagetest/output/wakeforest-Subject02-1040Burn02ID395658/annotations/worker-response/iteration-1/0/2020-10-29_20:56:13.json""" pseudo = s3Client.get_object(Bucket=bucket, Key=pkey) pseudo_stream = pseudo['Body'] pseudo_data = pseudo_stream.read() pd = pseudo_data.decode('utf-8') lines = pd.split('\n') lines.remove('') pseudo_json = {} for line in lines: row = line.split('\t') pseudo_json.update({row[0]: row[1]}) uri = pseudo_json[str(iteration[0])] pseudo_name = uri.split('/')[-1] pname = pseudo_name.split('.')[0] pint = [int(s) for s in pname.split('_') if s.isdigit()] study = pint[1] # metadata=smd_images_df[smd_images_df['Study']==int(study)] # meta_pseudo=metadata[metadata['S3_Location']=='PseudoColor/'+pseudo_name] # meta=meta_pseudo.iloc[[0]] # meta['ImageID']=str(rd.randint(1, 5000000)) # meta['ImageType']='Truth'
def estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict): def get_backlog_before(eb_type, pd, backlog_dict): pd = datetime.datetime.strptime(pd, '%Y-%m-%d').timestamp() all_pd = [ datetime.datetime.strptime(p, '%m/%d/%Y').timestamp() for p in backlog_dict['date'] ] all_back = backlog_dict[eb_type] b = np.interp(pd, all_pd, all_back) return int(b) if ('date' not in backlog_dict or future_supply <= 0 or not pd): return '' bl = get_backlog_before(eb_type, pd, backlog_dict) df_visa = load_gc_stats() clear_record = get_historical_clear(bl, pd, df_visa, eb_type) msg_list = [] try: total_month = 0 msg_list.append( f'There are {bl} total of {eb_type} green card demands in front of your PD {pd} at the date you filed your case.' ) for i, cr in enumerate(clear_record): fy, cl, rm, new_month = cr['fy'], cr['clear'], cr['remaining'], cr[ 'lapsed_month'] total_month += new_month if i == 0: msg_list.append( f'From {pd} to the end of FY{fy}, {cl} {eb_type} green card demands were cleared, {rm} were still remaining.' ) else: msg_list.append( f'During FY{fy}, {cl} {eb_type} green card demands were cleared, {rm} were still remaining.' ) if (len(clear_record) == 0): remaining_bl = bl additional_wait_starting_date = pd else: remaining_bl = clear_record[-1]['remaining'] additional_wait_starting_date = '2019-10-1' wait_time = remaining_bl / (future_supply + future_so) wy = int(wait_time) wm = int(np.round((wait_time - wy) * 12.0)) msg_list.append( f' Based on a future annual supply of {future_supply} and annual spillover of {future_so} for {eb_type}, the remaining backlog would need an additional {wy} year {wm} months starting from {additional_wait_starting_date}.' ) additional_wait_starting_yr = int( additional_wait_starting_date.split('-')[0]) additional_wait_starting_mm = int( additional_wait_starting_date.split('-')[1]) addtional_month = wy * 12 + wm green_month = additional_wait_starting_mm + addtional_month green_yr = additional_wait_starting_yr + int((green_month - 1) / 12.0) green_month = (green_month - 1) % 12 + 1 msg_list.append( f' Your final action date is likely to become current at {green_yr}-{str(green_month).zfill(2):}.' ) pd_yr = int(pd.split('-')[0]) pd_mm = int(pd.split('-')[1]) total_month = (green_yr - pd_yr) * 12 + (green_month - pd_mm) total_yr = int(total_month / 12) total_month -= total_yr * 12 msg_list.append( f' Your total wait time is {total_yr} year and {total_month} months after {pd}' ) msg = '\n\n'.join(msg_list) except: msg = 'Incorrect Input' return msg
def call_estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict): pd = pd.split(' ')[0] return estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict)
def parse_platefile_synergy_neo(self, sep='\t'): """ Extracts data from a platefile Data includes number of plates, assay types, plate names, number of well rows and cols. """ if sep != '\t': raise PlateFileUnknownFormat('Synergy Neo can only be parsed as ' 'tab-separated') self.file_format = 'Synergy Neo' pd = self.plate_file.read() try: pd = self._str_universal_newlines(pd.decode('utf-8')) except UnicodeDecodeError: raise PlateFileUnknownFormat('Error opening file with UTF-8 ' 'encoding (does file contain ' 'non-standard characters?)') plates = pd.split('Field Group\n\nBarcode:') if len(plates) == 1: plates = pd.split('Barcode\n\nBarcode:') if len(plates) == 1: raise PlateFileUnknownFormat('File does not appear to be in ' 'Synergy Neo format') self._create_db_platefile() well_measurements = [] for p in plates: if len(p.strip()) == 0: continue barcode_and_rest = p.split('\n', 1) barcode = barcode_and_rest[0].strip() plate_and_timepoint = self.extract_plate_and_timepoint(barcode) if plate_and_timepoint is None: raise PlateFileParseException('Unable to parse timepoint for ' 'barcode {} or from plate file ' 'name'.format(barcode)) plate_name = plate_and_timepoint['plate'] plate_timepoint = plate_and_timepoint['timepoint'] plate = self._plate_objects.get(plate_name, None) # Each plate can have multiple assays assays = re.split('\n\s*\n', barcode_and_rest[1]) for a in assays: a_strp = a.strip() if len(a_strp) == 0: continue well_lines = a.split('\n') assay_name = well_lines[0].strip() well_cols = len(well_lines[1].split()) # Minus 2: One for assay name, one for column headers well_rows = len(well_lines) - 2 if plate is None: plate = self._get_or_create_plate(plate_name, well_cols, well_rows) # Check plate dimensions are as expected if well_cols != plate.width: raise PlateFileParseException( 'Unexpected plate width on ' 'plate with barcode {} ' '(expected: {}, got: {})'.format( barcode, plate.width, well_cols)) if well_rows != plate.height: raise PlateFileParseException( 'Unexpected plate height on ' 'plate with barcode {} ' '(expected: {}, got: {})'.format( barcode, plate.height, well_rows)) well_id = 0 for row in range(2, len(well_lines)): for val in well_lines[row].split('\t')[1:-1]: well_measurements.append( WellMeasurement( well_id=self._well_sets[plate.id][well_id], timepoint=plate_timepoint, assay=assay_name, value=float(val))) well_id += 1 if not well_measurements: raise PlateFileParseException('File contains no readable ' 'plates') try: WellMeasurement.objects.bulk_create(well_measurements) except IntegrityError: raise PlateFileParseException('A file with the same plate, ' 'assay and time points has been ' 'uploaded to this dataset before') # Update modified_date self.dataset.save()