Esempio n. 1
0
    def __init__(self, dbconn=None, **kw):
        HTMLWriter.__init__(self)

        self.master = MasterDB(dbconn=dbconn) 
        # self.master.db.get_cursor()
        self.dbconn = self.master.db.conn
        
        self.kw  = {'form_pv':'', 'pv':'', 'inst_id':-1,'submit':''}
        self.kw.update(kw)
Esempio n. 2
0
    def __init__(self,dbconn=None,**kw):

        HTMLWriter.__init__(self)
        self.arch   = Instruments(dbconn=dbconn)
        self.dbconn  = self.arch.dbconn
        self.kw  = {'station_sel':'', 'newstation':'',
                    'station':'', 'instrument':'','pv':'',
                    'station_add':'','inst_id':-1,
                    'submit': '' }
Esempio n. 3
0
def main():

    # mtk? get command line arguments
    #Change this to a functions
    InputFileNameElements = [ "WebsiteSearchDetails.txt" ]
    OutputFileNameElements = [ "Reports", "Report.json" ]

    # read each line in input file and store
    # get_stolen_website_details() returns a list including string or URL and further list of Search Terms
    for WebsiteUrl, SearchItems in  get_auction_website_details(InputFileNameElements):

        print("Getting search items for " + WebsiteUrl)        
        
        # mtk - insert logic here for different website details...
        
        # class StolenAuctionSoup takes SearchItems and locates results accordingly 
        GumTreeSoup = StolenAuctionSoup()
        # from each stored line in WebsiteSearchDetails.txt get specific URL  
        MyUrl = GumTreeSoup.create_url_request(WebsiteUrl, SearchItems)
        # send URl request to specific website using Beautiful Soup
        MySoup = GumTreeSoup.get_soup(MyUrl)
        # mtk 07.17 - added code to save each report to a Unique folder with name made up from search details        
        UniqueOutputFileNameElements = create_unique_report_file_name(OutputFileNameElements, SearchItems)
        # use this same information for SubHeading in HTML report
        SubHeading = return_string_with_each_search_term(SearchItems)
        
        # Create instance of HTML (Report) Writer
        HTMLReport = HTMLWriter()
        HTMLReport.create_html_file(UniqueOutputFileNameElements)
        HTMLReport.add_main_heading(WebsiteUrl)
        HTMLReport.add_sub_heading(SubHeading)
        HTMLReport.add_table_by_id(WebsiteUrl)
        
        # examine the specific results from GumTree.com (function contains yield)
        for CurrentItem in GumTreeSoup.get_website_items(MySoup, os.path.dirname(os.path.join(*UniqueOutputFileNameElements))):
            
            print ("Reporting current item..." + CurrentItem[0])
            
            # ..and add details to report,
            HTMLReport.add_table_data_by_id(WebsiteUrl, CurrentItem)
            # along with any related images
            HTMLReport.add_table_data_image_by_id(WebsiteUrl, GumTreeSoup.CurrentDownloadedImageFileName)
        
        # create report with specified imputs
        HTMLReport.write_html_report_and_close(2)
Esempio n. 4
0
def main():
    extract_prefix = "AoT_Taiwan.complete"
    extract_tarpath = "{0}.{1}".format(
        extract_prefix,
        datetime.datetime.now().strftime("%Y-%m-%d"))
    clear_data(extract_tarpath)
    writer = HTMLWriter()
    urllib3.disable_warnings()
    temp_filename = "temp.tar"
    target_url = "https://www.mcs.anl.gov/research/projects/waggle/downloads/datasets/AoT_Taiwan.complete.recent.tar"

    if not os.path.isdir(extract_tarpath):
        download_record(target_url, temp_filename)

    with tarfile.open(temp_filename) as tf:
        tf.extract(extract_tarpath + "/data.csv.gz")
        tf.extract(extract_tarpath + "/nodes.csv")
    console_format("Extract Done!")
    console_format("Processing...")

    node_list = ['0CC', '110', '0FD']
    # node_list = ['0FD']
    df_nodes = pd.read_csv(extract_tarpath + "/nodes.csv")
    df_nodes = df_nodes[df_nodes['vsn'].isin(node_list)]
    df_nodes = df_nodes[['node_id', 'vsn']]

    sensor_param = {
        'co': ["concentration"],
        'h2s': ["concentration"],
        'no2': ["concentration"],
        'o3': ["concentration"],
        'so2': ["concentration"],
        'pms7003': ["1um_particle", "2_5um_particle", "10um_particle"],
        'bmp180': ["pressure", "temperature"],
        'hih4030': ["humidity"],
        'hih6130': ["humidity", "temperature"],
        'htu21d': ["humidity", "temperature"],
        'lps25h': ["temperature"],
        'pr103j2': ["temperature"],
        'tsys01': ["temperature"],
        'tmp421': ["temperature"],
        'tmp112': ["temperature"],
        'mma8452q': ["acceleration_x", "acceleration_y", "acceleration_z"],
        'bmi160': [
            "acceleration_x", "acceleration_y", "acceleration_z",
            "orientation_x", "orientation_y", "orientation_z"
        ],
        'hmc5883l':
        ["magnetic_field_x", "magnetic_field_y", "magnetic_field_z"],
        'tsl260rd': ["intensity"],
        'ml8511': ["intensity"],
        'mlx75305': ["intensity"],
        'si1145': [
            "intensity", "ir_intensity", "uv_intensity",
            "visible_light_intensity"
        ],
        'tsl250rd': ["intensity"],
        'apds_9006_020': ["intensity"],
        'spv1840lr5h_b': ["intensity"],
        'image_detector': ["car_total", "person_total"]
    }

    flatten_param = list(dict.fromkeys(
        itertools.chain(*sensor_param.values())))
    df_data = pd.read_csv(extract_tarpath + "/data.csv.gz", compression='gzip')
    df_data = df_data[df_data['sensor'].isin(list(sensor_param.keys()))]
    # df_data = df_data[df_data['parameter'].isin(flatten_param)].reset_index()
    # df_data = df_data.drop(columns='index')
    drop_rows = list()
    for index, row in df_data.iterrows():
        if not row['parameter'] in sensor_param[row['sensor']]:
            drop_rows.append(index)
    df_data = df_data.drop(index=drop_rows)

    df_data = pd.merge(df_nodes, df_data, on='node_id')
    df_data['timestamp'] = pd.to_datetime(df_data['timestamp'])
    df_data = df_data.groupby(
        ['node_id', 'vsn', 'subsystem', 'sensor',
         'parameter']).last().reset_index()
    df_data['value_raw'] = df_data['value_raw'].astype('float64')
    df_data['value_hrf'] = df_data['value_hrf'].astype('float64')

    cols = df_data.columns.tolist()
    cols.insert(0, cols.pop(cols.index('vsn')))
    df_data = df_data.reindex(columns=cols)

    output_filename = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    df_data.to_excel(output_filename + ".xlsx")
    writer.write(df_data, output_filename + ".html")
    # clear_temp()
    console_format("Done!")