def __init__(self, sub_group ='M03A'):
        #
        import Lily.ctao.hostmetadata as chmd
        import Lily.ctao.database as cdb
        self.sub_group      = sub_group
        self.hostmetadata   = chmd.hostmetadata()
        self.database       = cdb.database(self.hostmetadata.database)


        self.sub_warehouse  = '{0}/crawler_ETC_{1}'.format(self.hostmetadata.warehouse, self.sub_group)
        self.excel_filename    = '{0}/data_clawler_ETC_{1}_list.xlsx'.format(self.hostmetadata.warehouse , self.sub_group)
        self.sqlite_tablename  = 'data_crawler_ETC_{0}_list'.format(self.sub_group)
        self.sqlite_tablepull  = 'data_crawler_ETC_{0}_pull'.format(self.sub_group)

        #check/create if not exists directory
        if  not os.path.exists(self.sub_warehouse) :
            os.mkdir(self.sub_warehouse)

        #date regular expresstion YYYYMMDD
        date_YYYYMMDD_pattern = '''([12]\d{3}(0[1-9]|1[0-2])(0[1-9]|[12]\d|3[01]))'''

        self.url = 'http://tisvcloud.freeway.gov.tw/history/TDCS/{0}/'.format(self.sub_group)   #
        self.cloud_archive_pattern = 'href=\"({0}_{1}\.tar\.gz)\"' .format(self.sub_group,  date_YYYYMMDD_pattern)
        self.local_archive_pattern = '({0}_{1}\.tar\.gz)' .format(self.sub_group , date_YYYYMMDD_pattern)
        self.check_archive_list()
def check_docx(docx_file_name):
    from Lily.ctao.database     import database 
    from Lily.ctao.nsgstring    import alnum
    from Lily.ctao.hostmetadata import hostmetadata
    from Lily.blacksmith.file_feature import get_feature

    host    = hostmetadata()
    db      = database(host.database)
    doc     = Document(docx_file_name)
    feature = get_feature(docx_file_name)

    excelfile = feature['path'] + '/' + feature['name'] + '.xlsx'
    tablename = (feature['name'] + '_{0}')
    writer    = pandas.ExcelWriter( excelfile , engine = 'xlsxwriter')

    counter = 1
    sheetlist = []
    for tab in doc.tables:
        data1=[]
        for row in tab.rows:
            data1.append( [cell.text for cell in row.cells] )

        df = pandas.DataFrame(data1)
        counter = counter + 1
        table_name = tablename.format( str(counter).zfill(3) )
        sheetlist.append(table_name)
        df.to_sql(table_name, db.connect, if_exists='replace')
        df.to_excel(writer, sheet_name=table_name)

    writer.save()
    writer.close()
    return sheetlist
def check_module():
    import Lily.ctao.hostmetadata as chmd

    hobj1 = chmd.hostmetadata()
    if hobj1.platform[:7] == 'Windows':
        ui = tkui('check_module')
        for ind in ui.values:
            print(ind, ui.values[ind])
Esempio n. 4
0
    def __init__(self):
        self.ctaohost = hmd.hostmetadata()

        today = datetime.datetime.today()

        self.database_filename = self.ctaohost.warehouse + '/ctao_data_crawler_vehicledetect_{0}.sqlite'.format(
            today.strftime('%Y%m'))

        self.database = cdb.database(self.database_filename)

        self.sub_group = 'data_crawler_vd'

        self.dict_data = {
            'tpec_vddata': [
                'https://tcgbusfs.blob.core.windows.net/blobtisv/GetVDDATA.xml.gz',
                '<ExchangeTime>(.*)</ExchangeTime>', '%Y/%m/%dT%H:%M:%S'
            ],
            'tpec_vd': [
                'https://tcgbusfs.blob.core.windows.net/blobtisv/GetVD.xml.gz',
                '<vd:ExchangeTime>(.*)</vd:ExchangeTime>', '%Y/%m/%dT%H:%M:%S'
            ],
            'nfbx_1968': [
                'http://tisvcloud.freeway.gov.tw/xml/1min_incident_data_1968.xml',
                'time="([^"]*)"', '%Y-%m-%d %H:%M:%S'
            ],
            'nfbx_rlx1': [
                'http://tisvcloud.freeway.gov.tw/roadlevel_value.xml.gz',
                'updatetime="([^"]*)"', '%Y/%m/%d %H:%M:%S'
            ],
            'nfbx_rlx5': [
                'http://tisvcloud.freeway.gov.tw/roadlevel_value5.xml.gz',
                'updatetime="([^"]*)"', '%Y/%m/%d %H:%M:%S'
            ],
            'nfbx_vdx1': [
                'http://tisvcloud.freeway.gov.tw/vd_value.xml.gz',
                'updatetime="([^"]*)"', '%Y/%m/%d %H:%M:%S'
            ],
            'nfbx_vdx5': [
                'http://tisvcloud.freeway.gov.tw/vd_value5.xml.gz',
                'updatetime="([^"]*)"', '%Y/%m/%d %H:%M:%S'
            ]
        }

        #all opendata source
        self.list_df = pandas.DataFrame.from_dict(
            self.dict_data,
            orient='index',
            columns=[
                'url', 'exchange_time_repattern',
                'exchange_time_datetimepattern'
            ])

        self.list_df['gzip_context'] = numpy.random.bytes(1)
        self.list_df['download_datetime'] = numpy.datetime64(
            datetime.datetime.now())
        self.list_df['exchange_datetime'] = numpy.datetime64(
            datetime.datetime.now())
Esempio n. 5
0
def check_module():
    import Lily.ctao.hostmetadata as chmd
    hobj1 = chmd.hostmetadata()
    print('check moudel Lily.ctao.hostmetadata')
    print(hobj1.callname, hobj1.hostname, hobj1.platform)
    print(hobj1.database, hobj1.warehouse, hobj1.factory)

    dobj2 = database(hobj1.database)
    print(dobj2.tables())
    print('No news is good news')

    if hobj1.platform[:7] == 'Windows':
        ui = asktablename()
        print(ui.mydb.tables())
Esempio n. 6
0
    def __init__(self, database_path):

        self.database_path = database_path
        self.connect = sqlite3.connect(database_path)
        self.connect.enable_load_extension(True)

        import Lily.ctao.hostmetadata as ho
        self.platform = ho.hostmetadata().platform[:6]

        if self.platform == 'Linux-':
            self.connect.load_extension('libspatialite')
        else:
            self.connect.load_extension('mod_spatialite')

        self.cursor = self.connect.cursor()
        self.alias_count = 0
Esempio n. 7
0
def to_database( target_dir ):
    import Lily.ctao.database as cdb
    import Lily.ctao.nsgstring as nstr
    import Lily.ctao.hostmetadata as chmd

    host  = chmd.hostmetadata()
    p1 = nstr.alnum(host.platform)
    h1 = nstr.alnum(host.hostname)
    d1 = nstr.alnum(target_dir)

    db    = cdb.database(host.database)

    dflist = get_all_filefeature_with_md5sum(target_dir)
    table_name = '''data_rdset_filemd5_{0}_{1}_hhot_{2}'''.format(p1, h1, d1) 

    dflist.to_sql(table_name, db.connect, if_exists='replace', index=False)
Esempio n. 8
0
def check_moudle():
    import sys
    import Lily.ctao.hostmetadata as chmd
    from Lily.blacksmith.mppool import mppool
    pool = mppool()
 
    this_host= chmd.hostmetadata()

    if this_host.platform[:7] =='Windows': 
        from Lily.ctao.userargument import tkui
        ui = tkui('select_target_directory',[['target','sel', 'directory']])
        pool.run(to_database, ui.values['target'], 'get_all_file_feature')
    elif sys.argv == 2:
        pool.run(to_database, sys.argv[1], 'get_all_file_feature')
    else:
        target = input("Enter a directory name:(path)")
        pool.run(to_database, target, 'get_all_file_feature')
Esempio n. 9
0
def check_time():
    import Lily.ctao.database as cdb
    import Lily.ctao.nsgstring as nstr
    import Lily.ctao.hostmetadata as chmd
    import re
    host = chmd.hostmetadata()
    db = cdb.database(host.database)
    #^\d\d\d\d-(0?[1-9]|1[0-2])-(0?[1-9]|[12][0-9]|3[01]) (00|[0-9]|1[0-9]|2[0-3]):([0-9]|[0-5][0-9]):([0-9]|[0-5][0-9])$

    patern0 = r'''(0?[1-9]|1[0-2])/(0[1-9]|[12][0-9]|3[01])'''
    patern1 = r'''([0-2][0-9]):([0-5][0-9])'''
    patern2 = r'''^(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])|(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])$'''

    df = db.to_dataframe('hln_0206_3')
    df = df.iloc[1:]

    for ind, row in df.iterrows():
        twoday = [day for day in re.findall(patern0, row[1])]
        twotim = [tim for tim in re.findall(patern1, row[2])]

        if len(twoday) == 0:
            twoday = [('01', '01'), ('01', '01')]

        if len(twoday) == 1:
            twoday = [twoday[0], twoday[0]]

        if len(twotim) == 0:
            twotim = [('00', '00'), ('00', '00')]

        if len(twotim) == 1:
            twotim = [twotim[0], twotim[0]]

        date1 = '2018-{0}-{1} {2}:{3}'.format(twoday[0][0], twoday[0][1],
                                              twotim[0][0], twotim[0][1])
        date2 = '2018-{0}-{1} {2}:{3}'.format(twoday[1][0], twoday[1][1],
                                              twotim[1][0], twotim[1][1])

        df.iloc[ind]['beg'] = datetime.datetime.strptime(date1, '%Y%m%d %H%M')
        df.iloc[ind]['end'] = datetime.datetime.strptime(date2, '%Y%m%d %H%M')
Esempio n. 10
0
        for st in df.at[ind, 'Stations'].split(';'):
            if u'''地區最大震度''' not in st and st != '':
                rdset = [
                    df.at[ind, 'id'], df.at[ind, 'time'],
                    float(df.at[ind, 'px'][4:-2]),
                    float(df.at[ind, 'py'][4:-2]),
                    float(df.at[ind, 'depth'][:-3]),
                    float(df.at[ind, 'ML']), df.at[ind, 'Location'],
                    ''.join(st.split('\u3000')[:-1]),
                    float(st.split('\u3000')[-1:][0])
                ]
                station.append(rdset)
    df2 = pandas.DataFrame(
        station, columns=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'])
    df2.to_sql(
        'data_rdset_pylily_cwb_sensible_earthquake_LocalSeismicIntensity',
        db.connect,
        if_exists='replace',
        index=False)
    return


if __name__ == '__console__' or __name__ == '__main__':
    import os
    thost = chmd.hostmetadata()
    os.chdir(thost.warehouse)
    cwb_crawler()
    cwb_melt1()
    cwb_melt2()
Esempio n. 11
0
    def __init__(self):
        import Lily.ctao.database as cdb
        import Lily.ctao.hostmetadata as chmd

        self.this_host = chmd.hostmetadata()
        self.log_database = cdb.database(self.this_host.database)