Python splitの例、pandas.split Pythonの例

コード例 #1

0

ファイルを表示

ファイル: haaretz_downloader.py プロジェクト: sbeer/haaretz_caricatures

def download_caricatures_form_links(links):
    table_of_content = []
    for i,cp in links:

        r=0
        try:
            car_page_to_open = urllib2.urlopen("http://www.haaretz.co.il"+cp, timeout=15)
            caricature_page = car_page_to_open.read()
        except (urllib2.URLError, httplib.IncompleteRead, urllib2.URLError, socket.timeout), e :
            r=r+1
            print "Re-trying, attempt -- ",r
            time.sleep(2)
            pass


        car_page_to_open.close()

        picture_links = ["http://www.haaretz.co.il"+re.findall('.*?source srcset="(.*?)"', caricature_page)[0]]
        pic_date = re.findall('.*?itemprop="datePublished">(.*?) ', caricature_page)
        if len(pic_date)>0:
            pd_data = pic_date[0].split('.')
            pd = '.'.join([pd_data[1],pd_data[0],pd_data[2]])
        else:
            pd = 'non_date'
        pic_print_date = '_'.join(pd.split('.'))


        for pl in picture_links:
            print '.',
            head, tail = os.path.splitext(pl)
            dest_file = os.path.join(folder,'images',pic_print_date+tail)

            count = 0
            while os.path.exists(dest_file):
                count += 1
                dest_file = os.path.join(folder,'images', '%s-%d%s' % (pic_print_date, count, tail))

            #dowload the file
#         try:
#             urllib.urlretrieve(pl,dest_file)
#         except (urllib2.URLError, urllib2.URLError, socket.timeout), e :
#             r=r+1
#             print "Re-trying Retrive , attempt -- ",r
#             time.sleep(2)
#             pass

            ff, fname = os.path.split(dest_file)

            table_of_content.append({'End Date': '/'.join(pd.split('.')),
             'Headline': '<h1 style="color:black;text-align:right;font-size:35px">please add headline</h1>',
             'Media': pl,
             'Media Caption': '',
             'Media Credit': 'caricatures from Haaretz newspaper',
             'Media Thumbnail': '',
             'Start Date': '/'.join(pd.split('.')),
             'Tag': '',
             'Text': '<p style="text-align:right;font-size:20px">add text</p>',
             'Type': ''})

コード例 #2

0

ファイルを表示

    def call_estimate_wait_time(n_clicks, backlog_dict, eb_type, mf_msg, pd, future_supply, future_so):
        if(pd):
            pd = pd.split(' ')[0]
            pred_results = estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict)
            if(n_clicks):
                user_ip = get_ip()
                new_prediction_record = {
                    'timestamp': datetime.datetime.now().__str__(), 
                    'ip': user_ip,
                    'pd': pd,
                    'eb_type': eb_type,
                    'multiFactor': mf_msg,
                    'future_supply': future_supply,
                    'future_so':future_so,
                    'results': pred_results
                }
                res = requests.post(f'https://{db_url}/prediction_record.json', data = json.dumps(new_prediction_record))

                res = requests.get(f'https://{db_url}/prediction_count.json')
                count_record = res.json()
                count_record['count'] += 1
                res = requests.put(f'https://{db_url}/prediction_count.json', data = json.dumps(count_record))
            return pred_results
        else:
            return ''

コード例 #3

0

ファイルを表示

 def buildDices(self, cd, pd):
     c = cd.split(",")
     p = pd.split(",")
     l = len(c)
     if l == 6:
         for f in range(l):
             self.cdice[f] = PPP.setDiceFace(c[f])
             self.pdice[f] = PPP.setDiceFace(p[f])
     else:
         print("-- Configuration data is strange...")

コード例 #4

0

ファイルを表示

ファイル: analysis_backlog.py プロジェクト: 0xzz/eb_dashboard

def pd_to_FY(pd):
    yr, mm, _ = pd.split('-')
    yr = int(yr)
    mm = int(mm)
    fy = yr + 1 if mm >= 10 else yr

    pd = datetime.datetime.strptime(pd, '%Y-%m-%d')
    fy_begin = datetime.datetime(fy - 1, 10, 1)
    days_after_fy_start = (pd - fy_begin).days

    return fy, days_after_fy_start

コード例 #5

0

ファイルを表示

ファイル: fetch_stream.py プロジェクト: SEKARSARAVANAKUMAR/Smart-Facility-Management-AI-enabled-Services

def time_formation(pd):
    #new=pd.split(" ")
    #k=new[1]
    #p_period=get_period(k)
    new=pd.split(" ")
    p_Date =new[0]
    p_time = new[1]
    new=p_time.split(":")
    p_hour=new[0]
    p_minute=new[1]
    if p_minute>='29':
        p_minute='30'
    else:
        p_minute='00'
    p_date_time=p_Date+" "+p_hour+":"+p_minute+":00"
    return p_date_time,p_Date,p_time

コード例 #6

0

ファイルを表示

def main():
    
    # get arguments
    config, args = get_asr_args()
    
    # Train / Test
    if not args.eval:
        random.seed(args.seed)
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.seed)

        if not args.rnnlm:
            if not args.test:
                # Train ASR
                from asr.solver import Trainer as Solver
            else:
                # Test ASR
                from asr.solver import Tester as Solver
        else:
            # Train RNNLM
            from asr.solver import RNNLM_Trainer as Solver

        solver = Solver(config, args)
        solver.load_data()
        solver.set_model()
        solver.exec()

    # Eval
    else:                       
        decode = pd.read_csv(args.file,sep='\t',header=None)
        truth = decode[0].tolist()
        pred = decode[1].tolist()
        cer = []
        wer = []
        for gt,pd in zip(truth,pred):
            wer.append(ed.eval(pd.split(' '),gt.split(' '))/len(gt.split(' ')))
            cer.append(ed.eval(pd,gt)/len(gt))

        print('CER : {:.6f}'.format(sum(cer)/len(cer)))
        print('WER : {:.6f}'.format(sum(wer)/len(wer)))
        print('p.s. for phoneme sequences, WER=Phone Error Rate and CER is meaningless.')

コード例 #7

0

ファイルを表示

ファイル: eval.py プロジェクト: Eman22S/Amharic-Seq2Seq

import editdistance as ed
import pandas as pd
import argparse

# Arguments
parser = argparse.ArgumentParser(description='Evaluate decoding result.')
parser.add_argument('--file', type=str, help='Path to decode result file.')
paras = parser.parse_args()

print(paras.file)
                        
decode = pd.read_csv(paras.file,header=None, sep="\t")
truth = decode[0].tolist()
pred = decode[1].tolist()
cer = []
wer = []
for gt,pd in zip(truth,pred):
    wer.append(ed.eval(pd.split(' '),gt.split(' '))/len(gt.split(' ')))
    cer.append(ed.eval(pd,gt)/len(gt))

print('CER : {:.6f}'.format(sum(cer)/len(cer)))
print('WER : {:.6f}'.format(sum(wer)/len(wer)))
print('p.s. for phoneme sequences, WER=Phone Error Rate and CER is meaningless.')

コード例 #8

0

ファイルを表示

        # ad_json=json.loads(ad)
        # answers=ad_json['answers'][0]
        # t = answers['answerContent']['crowd-semantic-segmentation']
        # imgData = t['labeledImage']['pngImageData']
        # img = base64.b64decode(imgData)

        # with open('C:/tmpC:/tmp_mask.png', 'wb') as f:
        #     f.write(img)
        # change 5 Check if img  is empty or not, not use img != 0
        if np.sum(io.imread('C:/tmpC:/tmp_mask.png')) != 0:
            pkey = f"""sagetest/output/wakeforest-Subject02-1040Burn02ID395658/annotations/worker-response/iteration-1/0/2020-10-29_20:56:13.json"""
            pseudo = s3Client.get_object(Bucket=bucket, Key=pkey)
            pseudo_stream = pseudo['Body']
            pseudo_data = pseudo_stream.read()
            pd = pseudo_data.decode('utf-8')
            lines = pd.split('\n')
            lines.remove('')
            pseudo_json = {}
            for line in lines:
                row = line.split('\t')
                pseudo_json.update({row[0]: row[1]})
            uri = pseudo_json[str(iteration[0])]
            pseudo_name = uri.split('/')[-1]
            pname = pseudo_name.split('.')[0]
            pint = [int(s) for s in pname.split('_') if s.isdigit()]
            study = pint[1]
            # metadata=smd_images_df[smd_images_df['Study']==int(study)]
            # meta_pseudo=metadata[metadata['S3_Location']=='PseudoColor/'+pseudo_name]
            # meta=meta_pseudo.iloc[[0]]
            # meta['ImageID']=str(rd.randint(1, 5000000))
            # meta['ImageType']='Truth'

コード例 #9

0

ファイルを表示

ファイル: analysis_backlog.py プロジェクト: 0xzz/eb_dashboard

def estimate_wait_time(eb_type, pd, future_supply, future_so, backlog_dict):
    def get_backlog_before(eb_type, pd, backlog_dict):
        pd = datetime.datetime.strptime(pd, '%Y-%m-%d').timestamp()
        all_pd = [
            datetime.datetime.strptime(p, '%m/%d/%Y').timestamp()
            for p in backlog_dict['date']
        ]
        all_back = backlog_dict[eb_type]
        b = np.interp(pd, all_pd, all_back)
        return int(b)

    if ('date' not in backlog_dict or future_supply <= 0 or not pd):
        return ''

    bl = get_backlog_before(eb_type, pd, backlog_dict)

    df_visa = load_gc_stats()

    clear_record = get_historical_clear(bl, pd, df_visa, eb_type)

    msg_list = []

    try:
        total_month = 0

        msg_list.append(
            f'There are {bl} total of {eb_type} green card demands in front of your PD {pd} at the date you filed your case.'
        )

        for i, cr in enumerate(clear_record):
            fy, cl, rm, new_month = cr['fy'], cr['clear'], cr['remaining'], cr[
                'lapsed_month']
            total_month += new_month
            if i == 0:
                msg_list.append(
                    f'From {pd} to the end of FY{fy}, {cl} {eb_type} green card demands were cleared, {rm} were still remaining.'
                )
            else:
                msg_list.append(
                    f'During FY{fy}, {cl} {eb_type} green card demands were cleared, {rm} were still remaining.'
                )

        if (len(clear_record) == 0):
            remaining_bl = bl
            additional_wait_starting_date = pd
        else:
            remaining_bl = clear_record[-1]['remaining']
            additional_wait_starting_date = '2019-10-1'

        wait_time = remaining_bl / (future_supply + future_so)
        wy = int(wait_time)
        wm = int(np.round((wait_time - wy) * 12.0))

        msg_list.append(
            f' Based on a future annual supply of {future_supply} and annual spillover of {future_so} for {eb_type}, the remaining backlog would need an additional {wy} year {wm} months starting from {additional_wait_starting_date}.'
        )

        additional_wait_starting_yr = int(
            additional_wait_starting_date.split('-')[0])
        additional_wait_starting_mm = int(
            additional_wait_starting_date.split('-')[1])

        addtional_month = wy * 12 + wm

        green_month = additional_wait_starting_mm + addtional_month
        green_yr = additional_wait_starting_yr + int((green_month - 1) / 12.0)
        green_month = (green_month - 1) % 12 + 1
        msg_list.append(
            f' Your final action date is likely to become current at {green_yr}-{str(green_month).zfill(2):}.'
        )

        pd_yr = int(pd.split('-')[0])
        pd_mm = int(pd.split('-')[1])

        total_month = (green_yr - pd_yr) * 12 + (green_month - pd_mm)
        total_yr = int(total_month / 12)
        total_month -= total_yr * 12
        msg_list.append(
            f' Your total wait time is {total_yr} year and {total_month} months after {pd}'
        )

        msg = '\n\n'.join(msg_list)

    except:
        msg = 'Incorrect Input'

    return msg

コード例 #10

0

ファイルを表示

 def call_estimate_wait_time(eb_type, pd, future_supply, future_so,
                             backlog_dict):
     pd = pd.split(' ')[0]
     return estimate_wait_time(eb_type, pd, future_supply, future_so,
                               backlog_dict)

コード例 #11

0

ファイルを表示

    def parse_platefile_synergy_neo(self, sep='\t'):
        """
        Extracts data from a platefile

        Data includes number of plates, assay types, plate names, number of well
        rows and cols.
        """
        if sep != '\t':
            raise PlateFileUnknownFormat('Synergy Neo can only be parsed as '
                                         'tab-separated')

        self.file_format = 'Synergy Neo'
        pd = self.plate_file.read()
        try:
            pd = self._str_universal_newlines(pd.decode('utf-8'))
        except UnicodeDecodeError:
            raise PlateFileUnknownFormat('Error opening file with UTF-8 '
                                         'encoding (does file contain '
                                         'non-standard characters?)')

        plates = pd.split('Field Group\n\nBarcode:')

        if len(plates) == 1:
            plates = pd.split('Barcode\n\nBarcode:')
            if len(plates) == 1:
                raise PlateFileUnknownFormat('File does not appear to be in '
                                             'Synergy Neo format')

        self._create_db_platefile()

        well_measurements = []

        for p in plates:
            if len(p.strip()) == 0:
                continue
            barcode_and_rest = p.split('\n', 1)
            barcode = barcode_and_rest[0].strip()

            plate_and_timepoint = self.extract_plate_and_timepoint(barcode)

            if plate_and_timepoint is None:
                raise PlateFileParseException('Unable to parse timepoint for '
                                              'barcode {} or from plate file '
                                              'name'.format(barcode))

            plate_name = plate_and_timepoint['plate']
            plate_timepoint = plate_and_timepoint['timepoint']

            plate = self._plate_objects.get(plate_name, None)

            # Each plate can have multiple assays
            assays = re.split('\n\s*\n', barcode_and_rest[1])

            for a in assays:
                a_strp = a.strip()
                if len(a_strp) == 0:
                    continue

                well_lines = a.split('\n')
                assay_name = well_lines[0].strip()
                well_cols = len(well_lines[1].split())
                # Minus 2: One for assay name, one for column headers
                well_rows = len(well_lines) - 2

                if plate is None:
                    plate = self._get_or_create_plate(plate_name, well_cols,
                                                      well_rows)

                # Check plate dimensions are as expected
                if well_cols != plate.width:
                    raise PlateFileParseException(
                        'Unexpected plate width on '
                        'plate with barcode {} '
                        '(expected: {}, got: {})'.format(
                            barcode, plate.width, well_cols))

                if well_rows != plate.height:
                    raise PlateFileParseException(
                        'Unexpected plate height on '
                        'plate with barcode {} '
                        '(expected: {}, got: {})'.format(
                            barcode, plate.height, well_rows))

                well_id = 0
                for row in range(2, len(well_lines)):
                    for val in well_lines[row].split('\t')[1:-1]:
                        well_measurements.append(
                            WellMeasurement(
                                well_id=self._well_sets[plate.id][well_id],
                                timepoint=plate_timepoint,
                                assay=assay_name,
                                value=float(val)))
                        well_id += 1

        if not well_measurements:
            raise PlateFileParseException('File contains no readable '
                                          'plates')
        try:
            WellMeasurement.objects.bulk_create(well_measurements)
        except IntegrityError:
            raise PlateFileParseException('A file with the same plate, '
                                          'assay and time points has been '
                                          'uploaded to this dataset before')

        # Update modified_date
        self.dataset.save()