Exemple #1
0
 def getCheetahImg(self, calib=None):
     """Converts seg, row, col assuming (32,185,388)
        to cheetah 2-d table row and col (8*185, 4*388)
     """
     _calib = None
     if 'cspad2x2' in self.detInfo.lower():
         print "Not implemented yet: cspad2x2"
     elif 'cspad' in self.detInfo.lower():
         if calib is None:
             _calib = self.det.calib(self.evt)  # (32,185,388)
             if _calib is None:
                 return None
             else:
                 img = np.zeros((8 * 185, 4 * 388))
                 try:
                     img = utils.pct(_calib)
                 except:
                     pass
         else:
             img = np.zeros((8 * 185, 4 * 388))
             try:
                 img = utils.pct(calib)
             except:
                 pass
     elif 'rayonix' in self.detInfo.lower():
         if calib is None:
             _calib = self.det.calib(self.evt)
             if _calib is None:
                 return None
             else:
                 img = np.squeeze(_calib)  # (1920,1920)
         else:
             img = np.squeeze(calib)
     return img
Exemple #2
0
    def saveCheetahFormat(self, arg):
        dim0, dim1 = utils.getCheetahDim(self.parent.detInfo)

        if dim0 > 0:
            maxNumPeaks = 2048
            if self.parent.index.hiddenCXI is not None and self.peaks.shape[0] >= self.minPeaks:
                myHdf5 = h5py.File(self.parent.index.hiddenCXI, 'w')
                grpName = "/entry_1/result_1"
                dset_nPeaks = "/nPeaks"
                dset_posX = "/peakXPosRaw"
                dset_posY = "/peakYPosRaw"
                dset_atot = "/peakTotalIntensity"
                if grpName in myHdf5:
                    del myHdf5[grpName]
                myHdf5.create_group(grpName)
                myHdf5.create_dataset(grpName + dset_nPeaks, (1,), dtype='int')
                myHdf5.create_dataset(grpName + dset_posX, (1, maxNumPeaks), dtype='float32', chunks=(1, maxNumPeaks))
                myHdf5.create_dataset(grpName + dset_posY, (1, maxNumPeaks), dtype='float32', chunks=(1, maxNumPeaks))
                myHdf5.create_dataset(grpName + dset_atot, (1, maxNumPeaks), dtype='float32', chunks=(1, maxNumPeaks))

                myHdf5.create_dataset("/LCLS/detector_1/EncoderValue", (1,), dtype=float)
                myHdf5.create_dataset("/LCLS/photon_energy_eV", (1,), dtype=float)
                dset = myHdf5.create_dataset("/entry_1/data_1/data", (1, dim0, dim1), dtype=float)
                dsetM = myHdf5.create_dataset("/entry_1/data_1/mask", (dim0, dim1), dtype='int')

                # Convert calib image to cheetah image
                img = utils.pct(self.parent.detInfo, self.parent.calib)
                mask = utils.pct(self.parent.detInfo, self.parent.mk.combinedMask)
                dset[0, :, :] = img
                dsetM[:, :] = mask

                peaks = self.peaks.copy()
                nPeaks = peaks.shape[0]

                if nPeaks > maxNumPeaks:
                    peaks = peaks[:maxNumPeaks]
                    nPeaks = maxNumPeaks

                segs = peaks[:, 0]
                rows = peaks[:, 1]
                cols = peaks[:, 2]
                atots = peaks[:, 5]
                cheetahRows, cheetahCols = utils.convert_peaks_to_cheetah(self.parent.detInfo, segs, rows, cols)
                myHdf5[grpName + dset_posX][0, :nPeaks] = cheetahCols
                myHdf5[grpName + dset_posY][0, :nPeaks] = cheetahRows
                myHdf5[grpName + dset_atot][0, :nPeaks] = atots
                #for i, peak in enumerate(peaks):
                #    seg, row, col, npix, amax, atot, rcent, ccent, rsigma, csigma, rmin, rmax, cmin, cmax, bkgd, rms, son = peak[0:17]
                #    cheetahRow, cheetahCol = utils.convert_peaks_to_cheetah(self.parent.detInfo, seg, row, col)
                #    myHdf5[grpName + dset_posX][0, i] = cheetahCol
                #    myHdf5[grpName + dset_posY][0, i] = cheetahRow
                #    myHdf5[grpName + dset_atot][0, i] = atot
                myHdf5[grpName + dset_nPeaks][0] = nPeaks

                if self.parent.args.v >= 1: print("hiddenCXI clen (mm): ", self.parent.clen * 1000.)
                myHdf5["/LCLS/detector_1/EncoderValue"][0] = self.parent.clen * 1000.  # mm
                myHdf5["/LCLS/photon_energy_eV"][0] = self.parent.photonEnergy
                myHdf5.close()
Exemple #3
0
def df_to_records(df):
    # convert a dataframe pages subset to list of dicts
    # for use in templates
    key_metrics = [
        'Views', 'Visitors', 'Engaged minutes', 'Social interactions'
    ]
    device_cols = ['Desktop views', 'Mobile views', 'Tablet views']
    referral_cols = [
        'Search refs', 'Internal refs', 'Direct refs', 'Social refs',
        'Other refs', 'Fb refs', 'Tw refs'
    ]
    records = df.to_dict(orient='record')
    for record in records:
        record['Title'] = record['Title'].title().replace(
            '\'S ', '’s ').replace('\'T ', '’t ').replace('’S', '’s').replace(
                '’T', '’t').replace('’M', '’m').replace('Hsr', 'HSR').replace(
                    'Ndp', 'NDP').replace('Mcmaster', 'McMaster').replace(
                        'Lrt', 'LRT').replace('Cfl', 'CFL')
        record['Title'] = record['Title'][:72] + \
            (record['Title'][72:] and '...')
        for item in device_cols + referral_cols:
            record[f'''{item}%'''] = u.pct(record[item], record['Views'])
        record['Returning vis.%'] = u.pct(record['Returning vis.'],
                                          record['Visitors'])
        time = round((record['Avg. time']), 2)
        mins = int(time)
        seconds = int((time - mins) * 60)
        record['Avg. time formatted'] = f'''{mins}:{seconds:02d}'''
        # referrers string
        temp = []
        referrers = [('search', record['Search refs%']),
                     ('direct', record['Direct refs%']),
                     ('other', record['Other refs%']),
                     ('internal', record['Internal refs%']),
                     ('Tw', record['Tw refs%']), ('FB', record['Fb refs%'])]
        for item in sorted(referrers, key=lambda x: x[1], reverse=True):
            if item[1] > 9:
                temp.append(f'''{item[1]}% {item[0]}''')
        temp[0] = f'''<b>{temp[0]}</b>'''
        record['Referrers report'] = ", ".join(temp)
        # devices string
        temp = []
        devices_pv = [
            ('mobile', record['Mobile views%']),
            ('desktop', record['Desktop views%']),
            ('tablet', record['Tablet views%']),
        ]
        for item in sorted(devices_pv, key=lambda x: x[1], reverse=True):
            if item[1] > 9:
                temp.append(f'''{item[1]}% {item[0]}''')
        temp[0] = f'''<b>{temp[0]}</b>'''
        record['Devices report'] = ", ".join(temp)
    return {'the_list': records}
    def saveCheetahFormatMask(self, run, arg):
        if arg == self.parent.facilityLCLS:
            if 'cspad' in self.parent.detInfo.lower():
                dim0 = 8 * 185
                dim1 = 4 * 388
            elif 'rayonix' in self.parent.detInfo.lower(
            ) and 'mfx' in self.parent.experimentName:
                dim0 = 1920
                dim1 = 1920
            elif 'rayonix' in self.parent.detInfo.lower(
            ) and 'xpp' in self.parent.experimentName:
                dim0 = 1920
                dim1 = 1920
            else:
                print "saveCheetahFormatMask not implemented"

            fname = self.parent.index.outDir + '/r' + str(run).zfill(
                4) + '/staticMask.h5'
            print "Saving static mask in Cheetah format: ", fname
            myHdf5 = h5py.File(fname, 'w')
            dset = myHdf5.create_dataset('/entry_1/data_1/mask', (dim0, dim1),
                                         dtype='int')

            # Convert calib image to cheetah image
            if self.parent.mk.combinedMask is None:
                img = np.ones((dim0, dim1))
            else:
                img = np.zeros((dim0, dim1))
                counter = 0
                if 'cspad' in self.parent.detInfo.lower():
                    img = utils.pct(self.parent.mk.combinedMask)
                elif 'rayonix' in self.parent.detInfo.lower(
                ) and 'mfx' in self.parent.experimentName:
                    img = self.parent.mk.combinedMask[
                        counter, :, :]  # psana format
                elif 'rayonix' in self.parent.detInfo.lower(
                ) and 'xpp' in self.parent.experimentName:
                    img = self.parent.mk.combinedMask[
                        counter, :, :]  # psana format
            dset[:, :] = img
            myHdf5.close()
        elif arg == self.parent.facilityPAL:
            print "static mask not implemented for PAL"
            (dim0, dim1) = (2880, 2880)  # FIXME: read from geom file
            fname = self.parent.index.outDir + '/r' + str(run).zfill(
                4) + '/staticMask.h5'
            print "Saving static mask in Cheetah format: ", fname
            myHdf5 = h5py.File(fname, 'w')
            dset = myHdf5.create_dataset('/entry_1/data_1/mask', (dim0, dim1),
                                         dtype='int')
            img = np.ones((dim0, dim1))
            dset[:, :] = img
            myHdf5.close()
Exemple #5
0
    def saveCheetahStaticMask(self):
        # Save cheetah format mask
        if self.parent.facility == self.parent.facilityLCLS:
            if 'cspad' in self.parent.detInfo.lower():# and 'cxi' in self.parent.experimentName:
                dim0 = 8 * 185
                dim1 = 4 * 388
            elif 'rayonix' in self.parent.detInfo.lower():# and 'mfx' in self.parent.experimentName:
                dim0 = 1920
                dim1 = 1920
            #elif 'rayonix' in self.parent.detInfo.lower() and 'xpp' in self.parent.experimentName:
            #    dim0 = 1920
            #    dim1 = 1920
            else:
                print "#####################################"
                print "saveCheetahFormatMask not implemented"
                print "#####################################"
                return

            _tag = self.parseMaskTag()
            fname = self.parent.psocakeRunDir + "/staticMask"+_tag+".h5"
            print "Saving static mask in Cheetah format: ", fname
            myHdf5 = h5py.File(fname, 'w')
            dset = myHdf5.create_dataset('/entry_1/data_1/mask', (dim0, dim1), dtype='int')

            # Convert calib image to cheetah image
            if self.parent.mk.combinedMask is None:
                img = np.ones((dim0, dim1))
            else:
                img = np.zeros((dim0, dim1))
                counter = 0
                if 'cspad' in self.parent.detInfo.lower():# and 'cxi' in self.parent.experimentName:
                    img = utils.pct(self.parent.mk.combinedMask)
                elif 'rayonix' in self.parent.detInfo.lower():# and 'mfx' in self.parent.experimentName:
                    img = self.parent.mk.combinedMask[counter, :, :]  # psana format
                elif 'rayonix' in self.parent.detInfo.lower():# and 'xpp' in self.parent.experimentName:
                    img = self.parent.mk.combinedMask[counter, :, :]  # psana format
            dset[:, :] = img
            myHdf5.close()
        elif self.parent.facility == self.parent.facilityLCLS:
            (dim0, dim1) = self.parent.calib.shape
            fname = self.parent.psocakeRunDir + "/staticMask.h5"
            print "Saving static mask in Cheetah format: ", fname
            myHdf5 = h5py.File(fname, 'w')
            dset = myHdf5.create_dataset('/entry_1/data_1/mask', (dim0, dim1), dtype='int')

            # Convert calib image to cheetah image
            if self.parent.mk.combinedMask is None:
                img = np.ones((dim0, dim1))
            else:
                img = self.parent.mk.combinedMask
            dset[:, :] = img
            myHdf5.close()
    def saveCheetahStaticMask(self):
        dim0,dim1 = utils.getCheetahDim(self.parent.detInfo)
        _tag = self.parseMaskTag()
        fname = self.parent.psocakeRunDir + "/staticMask"+_tag+".h5"
        print("Saving Cheetah static mask in: ", fname)
        myHdf5 = h5py.File(fname, 'w')
        dset = myHdf5.create_dataset('/entry_1/data_1/mask', (dim0, dim1), dtype='int')

        # Convert calib image to cheetah image
        if self.parent.mk.combinedMask is None:
            img = np.ones((dim0, dim1))
        else:
            img = utils.pct(self.parent.detInfo, self.parent.mk.combinedMask)
        dset[:, :] = img
        myHdf5.close()
Exemple #7
0
def parse_sections_csv(dflt):
    section_limit = 15
    df = read_csv(filename=f'''{site}-sections.csv''',
                  folders=['data', dflt['freq']],
                  cols_to_keep=c.var['sections_cols_keep'])
    # MODIFY DATAFRAME
    views_sum = df['Views'].sum()
    visitors_sum = df['Visitors'].sum()
    df['PV / post'] = round(df['Views'] / df['Posts'], 0)
    df['Avg. Time'] = round(df['Engaged minutes'] / df['Visitors'], 3)
    # Create record, add to list
    the_list = []
    for record in df.head(section_limit).to_dict(orient='record'):
        obj = {}
        obj['Section'] = record['Section']
        obj['Posts'] = record['Posts']
        obj['PV %'] = u.pct(record['Views'], views_sum)
        obj['PV / Post'] = int(record['PV / post'])
        obj['UV %'] = u.pct(record['Visitors'], visitors_sum)
        obj['Returning Vis%'] = u.pct(record['Returning vis.'],
                                      record['Visitors'])
        time = record['Avg. Time']
        mins = int(time)
        seconds = int(round((time - mins) * 60, 0))
        obj['Avg. time'] = f'''{mins}:{seconds:02d}'''
        obj['Search %'] = u.pct(record['Search refs'], record['Views'])
        obj['Internal %'] = u.pct(record['Internal refs'], record['Views'])
        obj['Social %'] = u.pct(record['Social refs'], record['Views'])
        obj['Direct %'] = u.pct(record['Direct refs'], record['Views'])
        obj['Other %'] = u.pct(record['Other refs'], record['Views'])
        the_list.append(obj)
    # pprint.pprint(the_list)
    return template(
        'top_sections.html',
        data={'the_list': the_list},
    )
Exemple #8
0
def site_parse(dflt):
    site = dflt['site']
    freq = dflt['freq']
    period = dflt['config'][freq]['period']
    key_metrics = [
        'Views', 'Visitors', 'New Posts', 'Engaged minutes',
        'Social interactions'
    ]
    device_cols = ['Desktop views', 'Mobile views', 'Tablet views']
    referral_cols = [
        'Search refs', 'Internal refs', 'Direct refs', 'Social refs',
        'Other refs', 'Fb refs', 'Tw refs'
    ]
    df = read_csv(filename=f'''{site}-site-2019.csv''',
                  folders=['data', freq],
                  cols_to_keep=dflt['config']['site_cols_keep'])
    # ---- FIX DATAFRAME --------
    # fix any columns that may have float data types but should be integers
    for item in key_metrics + device_cols + referral_cols:
        df[item] = df[item].apply(lambda x: int(x))
    # reverse sort on Date because that's how Pandas likes to roll
    # this means for latest data we'll pull 'last' or tail items
    df = df.sort_values(by=['Date'])
    # ---- MODIFY DATAFRAME --------
    if freq == 'daily':
        # get day of week of each row,
        # so I can compare to other same days
        df['Date'] = pd.to_datetime(df['Date'])
        df['DayOfWeek'] = df['Date'].dt.day_name()
        # get a list of all x day's pages views, sorted.
        # then get index of this period's page views int aht list, that's the rank.
    # export to CSV for testing-validation
    df.to_csv('site-test.csv')
    data = {}
    # get key metrics for LATEST, vs Period, vs ALL
    new = df.tail(1)
    roll_avg = df.tail(period)
    if freq == 'daily':
        this_day = new['DayOfWeek'].values[0]
        roll_avg = roll_avg[roll_avg['DayOfWeek'] == this_day]
        all = df[df['DayOfWeek'] == this_day]
    else:
        roll_avg = df.tail(period)
        all = df
    this_pv = new['Views'].values[0]
    for item in key_metrics:
        data[item] = new[item].values[0]
        # print(f'{item[0 ]} RM value ', total[item[1]].mean())
        data[f"{item + ' vs rm%'}"] = u.vs_rm_pct(new[item].values[0],
                                                  roll_avg[item].mean())
    # ---COMPARE THIS PERIOD TO OTHERS
    # by period and total
    roll_avg_pv_list = sorted(list(roll_avg['Views'].values), reverse=True)
    roll_avg_count = len(roll_avg_pv_list)
    print("roll avg count: ", roll_avg_count)
    this_pv_roll_avg_rank = (roll_avg_pv_list.index(this_pv)) + 1
    all_pv_list = sorted(list(all['Views'].values), reverse=True)
    all_count = len(all_pv_list)
    this_pv_all_rank = (all_pv_list.index(this_pv)) + 1
    if freq == 'daily':
        data['Views rank'] = (
            f'''Was {ordinal(this_pv_roll_avg_rank)} best '''
            f'''{this_day} in last {roll_avg_count}, '''
            f'''{ordinal(this_pv_all_rank)} best in last {all_count}''')
    else:
        data['Views rank'] = (
            f'''Was {ordinal(this_pv_roll_avg_rank)} best '''
            f'''{dflt['config'][freq]['term']} in last {roll_avg_count}, '''
            f'''{ordinal(this_pv_all_rank)} best in last {all_count}''')
    # Get period avg, so I can use for 'key changes'
    # in report. ie if a change is > 5% of rm
    data['Views rm'] = round(roll_avg['Views'].mean(), 0)
    # get percentages of key metrics
    for item in device_cols + referral_cols:
        # What's the ratio of this stat to latest period's views?
        data[f'''{item + '%'}'''] = u.pct(new[item].values[0],
                                          new['Views'].values[0])
        # difference between new stat and period avg stat
        data[f'''{item + ' diff vs rm'}'''] = new[item].values[0] - \
            round(roll_avg[item].mean(), 0)
        # percentage of difference between new stat and roll_avg stat
        data[f'''{item + ' vs rm%'}'''] = u.vs_rm_pct(new[item].values[0],
                                                      roll_avg[item].mean())

    # percentage of visitors who are returning
    data['Returning vis.%'] = u.pct(new['Returning vis.'].values[0],
                                    new['Visitors'].values[0])
    # percentage of difference between new 'Returninv vis.' and period avg
    data['Returning vis. vs rm%'] = u.vs_rm_pct(
        new['Returning vis.'].values[0], roll_avg['Returning vis.'].mean())
    # get avg time on site (in decimal format. Can covert to mm:ss in report)
    data['site time dec'] = round(data['Engaged minutes'] / data['Visitors'],
                                  2)
    time = round((data['site time dec']), 2)
    mins = int(time)
    seconds = int((time - mins) * 60)
    data['site time formatted'] = f'''{mins}:{seconds:02d}'''
    data['site time dec vs rm%'] = u.vs_rm_pct(
        data['site time dec'],
        roll_avg['Engaged minutes'].mean() / roll_avg['Visitors'].mean())
    # produce devices breakdown report string
    devices = [('mobile', data['Mobile views%']),
               ('desktop', data['Desktop views%']),
               ('tablet', data['Tablet views%'])]
    temp = []
    for item in sorted(devices, key=lambda x: x[1], reverse=True):
        temp.append(f'''{item[1]}% {item[0]}''')
    data['Devices report'] = ", ".join(temp)
    # produce views breakdown report string
    referrers = [
        ('social', data['Social refs%']),
        ('search', data['Search refs%']),
        ('internal', data['Internal refs%']),
        ('direct', data['Direct refs%']),
        ('other', data['Other refs%']),
    ]
    temp = []
    for item in sorted(referrers, key=lambda x: x[1], reverse=True):
        temp.append(f'''{item[1]}% {item[0]}''')
    data['Referrers report'] = ", ".join(temp)
    # produce referral changes
    referral_changes = [
        ('search', data['Search refs diff vs rm']),
        ('internal', data['Internal refs diff vs rm']),
        ('direct', data['Direct refs diff vs rm']),
        ('other', data['Other refs diff vs rm']),
        ('FB', data['Fb refs diff vs rm']),
        ('Tw', data['Tw refs diff vs rm']),
    ]
    temp = []
    for item in sorted(referral_changes, key=lambda x: x[1], reverse=True):
        if abs(item[1]) > (0.01 * data['Views rm']):
            if item[1] < 0:
                temp.append(
                    (f'''<span style="color: #8B0000; font-weight: 700;">'''
                     f'''{(u.humanize(value=item[1]))}</span> {item[0]}'''))
            else:
                temp.append(
                    (f'''<span style="color: #006400; font-weight: 700;">'''
                     f'''{u.humanize(value=item[1], sign=True)}'''
                     f'''</span> {item[0]}'''))
    data['Referrers change report'] = ", ".join(temp)
    # pprint.pprint(data)

    return template(
        'site_highlights.html',
        data=data,
        site=site,
        freq=freq,
        period=period,
        term=dflt['config'][freq]['term'],
    )
Exemple #9
0
def pages_parse(dflt):
    data = {}
    the_html = ''
    # deflt -> dict
    '''
        'site': site,
        'freq': freq,
        'defaults': c.var[site][freq]
    '''
    # get needed dataframes
    # parse needed dataframes
    # call template with data
    # return html
    freq = dflt['freq']
    site = dflt['site']
    df = read_csv(filename=f'''{site}-pages.csv''',
                  folders=['data', freq],
                  cols_to_keep=dflt['config']['pages_cols_keep'])
    key_metrics = [
        'Views', 'Visitors', 'Engaged minutes', 'Social interactions'
    ]
    device_cols = ['Desktop views', 'Mobile views', 'Tablet views']
    referral_cols = [
        'Search refs', 'Internal refs', 'Direct refs', 'Social refs',
        'Other refs', 'Fb refs', 'Tw refs'
    ]
    # ---- FIX DATAFRAME --------
    # fix any columns that may have float data types but should be integers
    for item in key_metrics + device_cols + referral_cols:
        df[item] = df[item].apply(lambda x: int(x))
    # fix any empty columns that should have strings but are 0
    for item in ['URL', 'Title', 'Publish date', 'Authors', 'Section', 'Tags']:
        df[item] = df[item].apply(lambda x: x if x != 0 else 'none')
    # ---- MODIFY DATAFRAME --------
    # add asset ids if relevant to page
    df['asset id'] = df['URL'].apply(
        lambda x: (re.search(r'.*(\d{7})-.*', x)).group(1)
        if re.search(r'.*(\d{7})-.*', x) else 'none')
    # obits have Pub date and Asset ID, but Title == none
    # index pages have Title but Pub date == none and Asset ID == none
    # Ha! Events have Asset ID AND Title, but Pub date = none
    # static assets are here, with -static in URL
    # true article pages have title != none, Asset Id != none
    # Filter df just for articles
    df_articles_temp = df[(df['asset id'] != 'none') & (df['Title'] != 'none')
                          & ((df['Publish date'] != 'none')) &
                          ((df['URL'].str.contains('-static') == False))]
    # sort by pub date for following aggregate functions
    # print(df_articles_temp[df_articles_temp['Publish date'] == 0])
    df_articles_temp = df_articles_temp.sort_values(by=['Publish date'],
                                                    ascending=False)
    # aggregate article data by asset ID
    aggregation_functions = {
        'URL': 'first',
        'Title': 'first',
        'Publish date': 'last',
        'Authors': 'first',
        'Section': 'first',
        'Tags': 'first',
        'Visitors': 'sum',
        'Views': 'sum',
        'Engaged minutes': 'sum',
        'New vis.': 'sum',
        'Returning vis.': 'sum',
        'Desktop views': 'sum',
        'Mobile views': 'sum',
        'Tablet views': 'sum',
        'Search refs': 'sum',
        'Internal refs': 'sum',
        'Other refs': 'sum',
        'Direct refs': 'sum',
        'Social refs': 'sum',
        'Fb refs': 'sum',
        'Tw refs': 'sum',
        'Social interactions': 'sum',
        'Fb interactions': 'sum',
        'Tw interactions': 'sum',
        'asset id': 'first',
    }
    df_articles = df_articles_temp.groupby(
        df_articles_temp['asset id']).aggregate(aggregation_functions)
    # ADD MORE COLUMNS NEEDED FOR ARTICLES
    df_articles['Avg. time'] = round(
        df_articles['Engaged minutes'] / df_articles['Visitors'], 3)
    df_articles['Category'] = df_articles['Section'].apply(
        lambda x: x.split('|')[0] if x != 0 else 'none')
    df_articles['Subcategory'] = df_articles['Section'].apply(
        lambda x: x.split('|')[-1] if x != 0 else 'none')
    # convert tags category to string if not already
    df_articles['Tags'] = df_articles['Tags'].apply(lambda x: x
                                                    if x != 0 else 'none')
    # -- GET TOP ARTICLES BY PAGE VIEWS
    # sort by page views
    df_articles = df_articles.sort_values(by=['Views'], ascending=False)
    # export to CSV for testing
    df_articles.to_csv('articles.csv')
    # CHANGE 10 IF MORE/FEWER TOP ARTICLES WANTED
    limit = {'daily': 7, 'weekly': 10, 'monthly': 10}[freq]
    top_articles = df_articles.head(limit)
    the_html = template(
        'top_articles_by_pv.html',
        data=df_to_records(top_articles),
    )
    # -- GET TOP ARTICLES IN SECTIONS OPINION, LIVING, ARTS, OPINION
    opinion_pages = df_articles[(df_articles['Tags'].str.contains('opinion'))
                                & (df_articles['Tags'].str.contains(
                                    'sports|living|whatson|subcategory:news',
                                    regex=True) == False)].head(3)
    # handle top articles in with only opinion as category
    data = df_to_records(opinion_pages)
    the_html += template(
        'top_articles_by_section.html',
        data=data,
        section='opinion',
    )
    sections = ['living', 'whatson', 'sports']
    for section in sections:
        # CHANGE 3 IF DIFFERENT NUMBER OF ARTICLES WANTED
        section_pages = df_articles[df_articles['Tags'].str.contains(
            section)].head(3)
        print('SECTION IS: ', section)
        # pprint.pprint(data)
        the_html += template(
            'top_articles_by_section.html',
            data=df_to_records(section_pages),
            section=section,
        )
    # -- GET TOP ARTICLES BY ENGAGED TIME
    visitor_limit = 200
    df_articles = df_articles.sort_values(by=['Avg. time'], ascending=False)
    long_reads = df_articles[df_articles['Visitors'] > visitor_limit].head(5)
    the_html += template('top_articles_by_section.html',
                         data=df_to_records(long_reads),
                         section='time',
                         visitor_limit=visitor_limit)
    # -- GET TOP LOCAL ARTICLES BY ENGAGED TIME
    #   long_reads = df_articles[df_articles['Tags'].str.contains(paper)].head(10)
    # -- GET HOME PAGE STATS
    url = dflt['config']['home'][site]

    try:
        df_hp = df[df['URL'] == url]
        # *** need to access site csv here
        df_site = read_csv(filename=f'''{site}-site-2019.csv''',
                           folders=['data', freq],
                           cols_to_keep=dflt['config']['site_cols_keep'])
        df_site = df_site.sort_values(by=['Date'], ascending=False)
        pv_total = df_site.tail(1)['Views'].values[0]
        time = round(
            (df_hp['Engaged minutes'].values[0] / df_hp['Visitors'].values[0]),
            2)
        mins = int(time)
        seconds = int((time - mins) * 60)
        data_hp = {
            'avg time':
            f'''{mins}:{seconds:02d}''',
            'pv':
            df_hp['Views'].values[0],
            'pv vs total':
            u.pct(df_hp['Views'].values[0], pv_total),
            'uv':
            df_hp['Visitors'].values[0],
            'min':
            df_hp['Engaged minutes'].values[0],
            'returning uv%':
            u.pct(df_hp['Returning vis.'].values[0],
                  df_hp['Visitors'].values[0]),
            'mobile pv':
            df_hp['Mobile views'].values[0],
            'desktop pv':
            df_hp['Desktop views'].values[0],
            'tablet pv':
            df_hp['Tablet views'].values[0],
            'mobile pv%':
            u.pct(df_hp['Mobile views'].values[0], df_hp['Views'].values[0]),
            'desktop pv%':
            u.pct(df_hp['Desktop views'].values[0], df_hp['Views'].values[0]),
            'tablet pv%':
            u.pct(df_hp['Tablet views'].values[0], df_hp['Views'].values[0]),
            'search pv%':
            u.pct(df_hp['Search refs'].values[0], df_hp['Views'].values[0]),
            'direct pv%':
            u.pct(df_hp['Direct refs'].values[0], df_hp['Views'].values[0]),
            'internal pv%':
            u.pct(df_hp['Internal refs'].values[0], df_hp['Views'].values[0]),
            'social pv%':
            u.pct(df_hp['Social refs'].values[0], df_hp['Views'].values[0]),
            'other pv%':
            u.pct(df_hp['Other refs'].values[0], df_hp['Views'].values[0]),
        }
    except:
        data_hp = 'NA'
    the_html += template('home_page.html',
                         data=data_hp,
                         inputs=c.var['inputs'][site],
                         freq=freq)
    return the_html