예제 #1
0
    def test_01_create_file(self):

        fle2 = cl.DataTable(fname2, 'file')
        fle2.save(fname2, ['test data', 'another line', 'final line'])

        fle3 = cl.DataTable(fname3, 'file')
        file_contents = fle2.load(fname)
        self.assertEqual(len(file_contents), 157)
        fle3.drop(fname)
예제 #2
0
def main():
    """
    This is an example of project documentation using AIKIF
    It documents the project itself, including requirements, 
    design, test, goals, 
    
    """
    print('Initialising AIKIF Project...')
    name = 'AIKIF'
    type = 'Software'
    desc = """
     Artificial Intelligence Knowledge Information Framework - Project Overview
       
     This document was autogenerated via aikif/examples/AIKIF_project.py  
"""
    desc += '\n     Last updated ' + mod_dt.TodayAsString()	

        
    fldr = os.getcwd() # 'T:\\user\\dev\\src\\python\\AIKIF\\aikif\\examples\\AIKIF_project'
    report_file_base = fldr + os.sep + 'aikif_report'
    p = mod_prj.Project(name, type, desc, fldr)
    project_setup(p)


    requirements = mod_dt.DataTable('requirements.csv', ',', col_names=['id', 'dep_id', 'name', 'details'])
    p.log_table(requirements)
    p.record(requirements, '', ['a', '', 'process data', 'automatically process source files to tables based on rules'])
    p.record(requirements, '', ['b', '', 'define structures', 'use mappings and ontology to specify what to do'])
    p.record(requirements, '', ['c', '', 'log intent', 'log key events'])
    p.record(requirements, '', ['d', '', 'methods toolbox', 'implement a set of programs that can be used generically'])
    p.record(requirements, '', ['e', '', 'Command Line Interface', 'CLI to query and update datasets and control processes'])
    p.record(requirements, '', ['a01', 'a', 'download CSV', 'download CSV file from website'])
    p.record(requirements, '', ['a02', 'a', 'load CSV to table', 'import CSV file to Database table'])
    p.record(requirements, '', ['a03', 'a', 'find blank rows in CSV', 'read CSV file and count DQ issues'])
    p.record(requirements, '', ['a04', 'a', 'aggregate table', 'summarise Database table by col(n)'])
    p.record(requirements, '', ['e01', 'e', 'CLI commands', 'CLI to manage commands and modes of operation'])
    p.record(requirements, '', ['e02', 'e', 'CLI query', 'query functions of datasets in AIKIF using basic fixed commands'])
    p.record(requirements, '', ['e03', 'e', 'CLI NLP', 'integrate NLP to allow english questions and data addition in add/query mode'])
    p.record(requirements, '', ['e04', 'e', 'CLI add', 'allows user to add data to general or specific datasets'])
    p.record(requirements, '', ['e05', 'e', 'CLI process', 'allows managing of all processes in AIKIF'])

    progress = mod_dt.DataTable('progress.csv', ',', col_names=['program', 'percent', 'details'])
    p.log_table(progress)
    p.record(progress, '', ['knowledge', '1%',  'class to manage raw data to information'])
    p.record(progress, '', ['mapper', '20%', 'mapping columns to data structures, with business rules'])
    p.record(progress, '', ['sql_code_generator', '90%', 'generates SQL to transform data external to AIKIF'])
    
    issues = mod_dt.DataTable('issues.csv', ',', col_names=['id', 'name', 'details'])
    p.log_table(issues)
    p.record(issues, '', ['01', 'In Progress', 'implement AIKIF project logging'])
    p.record(issues, '', ['02', 'todo', 'implement Knowledge mapping'])
    p.record(issues, '', ['03', 'Testing', 'data mapping of columns'])
    
    
  #  p.build_report(report_file_base + '.md', type='md')
    p.build_report(report_file_base + '.rst', type='rst')
    print('Done...')
예제 #3
0
 def test_06_create_blank_data_structure(self):
     dat = cl.DataTable('sales.csv',
                        ',',
                        col_names=['date', 'amount', 'details'])
     self.assertEqual(dat.col_names[0], 'date')
     self.assertEqual(dat.col_names[1], 'amount')
     self.assertEqual(dat.col_names[2], 'details')
예제 #4
0
 def test_05_update_where1(self):
     fle = cl.DataTable(fname, ',')
     fle.load_to_array()
     dist_cols = fle.get_distinct_values_from_cols(['TERM', 'ID'])
     fle.add_cols(['RNK_tot1', 'RNK_tot2'])
     for new_col in ['tot1', 'tot2']:
         for i in dist_cols:
             first, third, median = fle.calc_percentiles(
                 new_col, ['TERM', 'ID'], [str(i[0]), str(i[1])])
             fle.update_where('RNK_' + new_col, first, ['TERM', 'ID'],
                              [str(i[0]), str(i[1])])
     fle.save_csv(fname3)
     """
     ===========================================================
     TERM    GENDER  ID      tot1    tot2    RNK_tot1RNK_tot2
     5300    F       00078   18      66      18      66
     7310    M       00078   10      12      14.0    7.0
     7310    M       00078   18      465     14.0    7.0
     7310    F       00078   30      2       14.0    7.0
     7310    F       00016   25      12      35.5    227.25
     5300    M       00016   31      0       31      0
     7310    F       00016   67      873     35.5    227.25
     """
     self.assertEqual(
         fle.get_header(),
         ['TERM', 'GENDER', 'ID', 'tot1', 'tot2', 'RNK_tot1', 'RNK_tot2'])
     self.assertEqual(fle.arr[0][3], '18')
     self.assertEqual(fle.arr[0][4], '66')
     self.assertEqual(fle.arr[1][5], 14.0)
     self.assertEqual(fle.arr[1][6], 7.0)
     self.assertEqual(fle.arr[6][6], 227.25)
예제 #5
0
 def create_map_from_file(self, data_filename):
     """
     reads the data_filename into a matrix and calls the main
     function '' to generate a  .rule file based on the data in the map
     
     For all datafiles mapped, there exists a .rule file to define it
     
     """
     
     op_filename = data_filename + '.rule'
     
     dataset = mod_datatable.DataTable(data_filename, ',')
     dataset.load_to_array()
     l_map = self.generate_map_from_dataset(dataset)
     with open(op_filename, 'w') as f:
         f.write('# rules file autogenerated by mapper.py v0.1\n')
         f.write('filename:source=' + data_filename + '\n')
         f.write('filename:rule=' + op_filename + '\n\n')
         for row in l_map:
             #print('ROW = ' , row)
             if type(row) is str:
                 f.write(row + '\n')
             else:
                 for v in row:
                     f.write(v)
예제 #6
0
def create_sample_projects():
    proj1 = project.Project(name='Acute Software', desc='Custom Software development', fldr='')
    proj1.add_detail('website', 'http://www.acutesoftware.com.au')
    proj1.add_detail('email', '*****@*****.**')
    proj2 = project.Project(name='Sales Log',  desc='Record list of sales', fldr='')
    proj2.add_detail('Note', 'List of sales taken from manual entries in test program')
    
    tbl_exp = cls_datatable.DataTable('expenses.csv', ',', col_names=['date', 'amount', 'details'])
    proj2.record(tbl_exp, 'Expense', ['2015-02-13', 49.94, 'restaurant'])
    proj2.record(tbl_exp, 'Expense', ['2015-02-15', 29.00, 'petrol'])
    proj2.record(tbl_exp, 'Expense', ['2015-02-17', 89.95, 'fringe tickets'])
    
    proj_diary = project.Project(name='Diary', fldr=root_folder, desc='Diary database for PIM application')
    proj_diary.add_source('Calendar', root_folder)
    proj_diary.add_source('Bookmarks', root_folder)
    proj_diary.add_source('File Usage', root_folder)
    proj_diary.add_source('PC Usage', root_folder)
    proj_diary.add_source('TODO List', root_folder)

    
    all_projects = project.Projects()
    all_projects.add_project(proj_diary)
    all_projects.add_project(proj1)
    all_projects.add_project(proj2)
    
    return all_projects
예제 #7
0
 def test_03_get_distinct_values_from_cols3(self):
     fle = cl.DataTable(fname, ',')
     fle.load_to_array()
     dist_cols = fle.get_distinct_values_from_cols(['TERM', 'ID'])
     self.assertEqual(
         sorted(dist_cols),
         sorted([('5300', '00016'), ('5300', '00078'), ('7310', '00016'),
                 ('7310', '00078')]))
예제 #8
0
 def test_04_add_cols(self):
     fle = cl.DataTable(fname, ',')
     fle.load_to_array()
     fle.add_cols(['NEW1', 'NEW2'])
     #fle.describe_contents()
     self.assertEqual(
         fle.get_header(),
         ['TERM', 'GENDER', 'ID', 'tot1', 'tot2', 'NEW1', 'NEW2'])
예제 #9
0
 def test_07_add_data(self):
     dat = cl.DataTable('sales.csv',
                        ',',
                        col_names=['date', 'amount', 'details'])
     dat.add(['2015-01-09', 24.95, 'Timer'])
     dat.add(['2015-02-17', 45.00, 'Diary'])
     dat.add(['2015-02-19', 24.95, 'Timer'])
     self.assertEqual(len(dat.arr), 3)
     self.assertEqual(len(dat.count_unique_values(0, 'date')), 3)
     self.assertEqual(len(dat.count_unique_values(2, 'details')), 2)
예제 #10
0
 def test_02_percentile(self):
     fl3 = cl.DataTable('', '"')
     self.assertEqual(fl3.percentile([1, 2, 3, 4, 5, 6], .25), 2.25)
     self.assertEqual(fl3.percentile([1, 2, 3, 4, 5, 6], .5), 3.5)
     self.assertEqual(fl3.percentile([1, 2, 3, 4, 5, 6], .75), 4.75)
     self.assertEqual(fl3.percentile([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], .25),
                      3.25)
     self.assertEqual(fl3.percentile([1, 1, 2], .5), 1)
     self.assertEqual(fl3.percentile([1, 1, 2], .25), 1)
     self.assertEqual(fl3.percentile([1, 1, 2], .75), 1.5)
예제 #11
0
def main():
    fname = 'journal.csv'
    p = mod_prj.Project('Journal Record')
    print(p.nme)

    # Attempt #1 - using DataTable directly (TOK)
    dt = mod_dat.DataTable(fname,
                           ',',
                           col_names=['date', 'category', 'details'])
    dt.add(['2017-12-07', 'Software', 'test online version'])
    dt.add(['2017-06-11', 'Software', 'update readme'])
    dt.add(['2015-05-11', 'Shopping', 'bought jeans'])
    print(dt)
    """
    date	category	details
    11/05/2015	Software	creating LP_ADD_DATA.py to record journal to diary
    11/05/2015	Software	update readme
    11/05/2015	Shopping	bought jeans
    """
    dt.save_csv(fname)

    # attempt #2 using Core DATA  (TOK)
    e = mod_core.CoreDataWhen('Sales Meeting', [
        '2015-04-11', 'Office', 'Meet with client to discuss custom software'
    ])
    print(e.format_csv())

    # attempt #3 use an Events class to manage it all
    ev = Events(os.getcwd(), 'D', 'DAT')
    ev.add(
        mod_core.CoreDataWhen('Sales Meeting',
                              ['2014-01-11', 'Office', 'Catchup with client']))
    ev.add(
        mod_core.CoreDataWhen('Sales Meeting#3',
                              ['2015-03-11', 'Office', 'Catchup with client']))
    ev.add(
        mod_core.CoreDataWhen(
            'DEV AIKIF - core data',
            ['2015-05-11', 'Software', 'update TEST - no test for CORE_DATA']))
    ev.add(
        mod_core.CoreDataWhen(
            'DEV LifePim - core data',
            ['2015-03-11', 'Software', 'use data for LifePim']))
    ev.add(
        mod_core.CoreDataWhen('DEV AIKIF - data tools',
                              ['2015-05-11', 'Software', 'fix data tools ']))
    print(ev)

    ev.save()

    txt = 'Catchup'  # 'data'
    print('\n Searching for ', txt)
    srch = ev.find(txt)
    for s in srch:
        print(s)  # s.data[2]
예제 #12
0
 def _create_from_csv(self):
     """
     create a standard data object based on CSV file
     """
    
     import aikif.dataTools.cls_datatable as cl
     fle = cl.DataTable(self.input_data, ',')
     fle.load_to_array()
     self.content['data'] = fle.arr
     
     
     lg.record_process('_create_from_csv', 'read ' + self._calc_size_stats() + ' from ' + self.input_data)
예제 #13
0
def make_table(f_all, tbl, cols):
    tname = 'CREATE_' + tbl
    t = mod_table.DataTable(tbl, fldr + tbl + '.csv', cols)
    #t.save_csv(fldr + os.sep + tbl + '.csv')

    # create the SQL of a file
    t = SQLCodeGenerator(tbl)
    t.set_column_list(cols)
    t.create_script_fact()
    t.create_index(tbl, cols)
    t.save_ddl(fldr + os.sep + tname + '.SQL')
    f_all.write('@' + tname + '.SQL;\n')
예제 #14
0
def load_random_tables(d):
    root_folder = os.path.abspath(
        os.path.dirname(os.path.abspath(__file__)) + os.sep + '..' + os.sep +
        '..' + os.sep + 'data')
    fname = root_folder + os.sep + 'temp' + os.sep + 'TEMP_LOAD_TESTING.csv'
    create_test_file(fname)
    dt = mod_dt.DataTable(fname, ',')
    dt.load_to_array()
    num_loads = 400
    for load in range(20, 20 + num_loads):
        schema = 'tst' + str(load).zfill(5)
        d.import_datatable(dt, schema, 0)
        print('dbsize=', d.connection.dbsize(), ' Total memory=',
              d.connection.info()['used_memory_human'])
예제 #15
0
def create_test_file(fname):
    """ create a very large random test file """
    colLabel = [
        'id2', 'DATE', 'name', 'surname', 'Born', 'Location', 'Quote', 'Score',
        'Points'
    ]
    colTypes = [
        'STRING', 'DATE', 'PEOPLE', 'PEOPLE', 'PLACE', 'PLACE', 'WORD', 'INT',
        'INT'
    ]

    test_datatable = mod_dt.DataTable(fname, ',')
    test_datatable.arr = mod_gen.random_table(9, 100000, colTypes, colLabel)
    test_datatable.header = colLabel
    test_datatable.save_csv(fname, False)
예제 #16
0
    def test_50_extract_csv_to_fact(self):
        """
        read a CSV file to facts
        and parse it to CoreData obects
        """

        import aikif.dataTools.cls_datatable as cl
        fle = cl.DataTable(os.path.join(pth, 'data', 'core', 'LOCATION_WORLD.csv'), ',')
        fle.load_to_array()

        csv_res = ''
        for row in fle.arr:
            r = mod_core.CoreDataWhy(row[1], [{'code':row[1],'name':row[2]}])
            csv_res += r.format_csv()
            self.assertEqual(r.name, row[1])
        self.assertTrue('"COD","' in csv_res)
        self.assertTrue('\'name\': \'LABEL\'' in csv_res)
        self.assertTrue('\'code\': \'COD\'' in csv_res)
        self.assertTrue('\'West Bank\'' in csv_res)
        self.assertEqual(len(csv_res), 12163)
예제 #17
0
    def test_02_record(self):
        proj2 = project.Project(name='Sales Log',
                                desc='Record list of sales',
                                fldr='')
        proj2.add_detail(
            'Note', 'List of sales taken from manual entries in test program')
        self.assertEqual(proj2.details[0][0], 'Note')
        self.assertEqual(
            proj2.details[0][1],
            'List of sales taken from manual entries in test program')

        tbl_exp = cls_datatable.DataTable(
            'expenses.csv', ',', col_names=['date', 'amount', 'details'])
        proj2.record(tbl_exp, 'Expense', ['2015-02-13', 49.94, 'restaurant'])
        proj2.record(tbl_exp, 'Expense', ['2015-02-15', 29.00, 'petrol'])
        proj2.record(tbl_exp, 'Expense',
                     ['2015-02-17', 89.95, 'fringe tickets'])

        proj2.log_table(tbl_exp)
        proj2.build_report('task.rst', 'rst')
        proj2.build_report('task.md', 'md')
        proj2.build_report('task.html', 'html')
        self.assertEqual(len(tbl_exp.arr), 3)
        self.assertEqual(tbl_exp.arr[1][2], 'petrol')
예제 #18
0
def main():
    p = mod_prj.Project('Allen_AI',
                        tpe='Software',
                        fldr=op_folder,
                        desc='Kaggle competetion entry for Allen_AI')

    p.add_detail('kaggle_url',
                 'https://www.kaggle.com/c/the-allen-ai-science-challenge')
    p.add_detail('files_root_folder', root_folder)
    p.add_detail('files_src_data', src_data)
    p.add_detail('files_op_folder', op_folder)
    p.add_detail('date_last_ran', mod_dt.TodayAsString())

    lookup_src = mod_dt.DataTable('lookup_src.csv',
                                  ',',
                                  col_names=['name', 'url'])
    p.log_table(lookup_src)
    p.record(lookup_src, '', [
        'page-resources',
        'http://aclweb.org/aclwiki/index.php?title=RTE_Knowledge_Resources#Publicly_available_Resources'
    ])
    p.record(lookup_src, '', ['science_notes', 'http://www.ck12.org/'])
    p.record(lookup_src, '', [
        'data-wikipedia',
        'https://en.wikipedia.org/wiki/Wikipedia:Database_download'
    ])

    p.record(lookup_src, '', ['YAGO', 'http://yago-knowledge.org'])
    p.record(lookup_src, '', ['Dbpedia', 'http://dbpedia.org'])
    p.record(lookup_src, '', ['Freebase', 'http://freebase.com'])
    p.record(lookup_src, '',
             ['Entitycube', 'http://entitycube.research.microsoft.com'])
    p.record(lookup_src, '', ['renlifang', 'http://renlifang.msra.cn'])
    p.record(lookup_src, '', ['NELL', 'http://rtw.ml.cmu.edu'])
    p.record(lookup_src, '', ['DeepDive', 'http://deepdive.stanford.edu'])
    p.record(
        lookup_src, '',
        ['Probase', 'http://research.microsoft.com/en-us/projects/probase/'])
    p.record(lookup_src, '', ['KnowItAll', 'http://openie.cs.washington.edu'])
    p.record(lookup_src, '', ['ReVerb', 'http://reverb.cs.washington.edu'])
    p.record(lookup_src, '', ['BabelNet', 'http://babelnet.org'])
    p.record(
        lookup_src, '',
        ['WikiNet', 'http://www.h-its.org/english/research/nlp/download/'])
    p.record(lookup_src, '',
             ['ConceptNet', 'http://conceptnet5.media.mit.edu'])
    p.record(lookup_src, '', ['WordNet', 'http://wordnet.princeton.edu'])
    p.record(lookup_src, '', ['Linked Open Data', 'http://linkeddata.org'])

    progress = mod_dt.DataTable('progress.csv',
                                ',',
                                col_names=['program', 'percent', 'details'])
    p.log_table(progress)
    p.record(
        progress, '',
        ['Source data download', '100%', 'download competition sample data'])
    p.record(progress, '',
             ['Allen_AI_install.py', '0%', 'downloads lookup data, unzips'])
    p.record(progress, '',
             ['Allen_AI_run.py', '2%', 'main script to run the program'])
    p.record(
        progress, '',
        ['method1.py', '5%', 'method to answer questions using heuristic #1'])
    p.record(
        progress, '',
        ['method2.py', '0%', 'method to answer questions using heuristic #2'])
    p.record(
        progress, '',
        ['method3.py', '0%', 'method to answer questions using heuristic #3'])

    t = mod_tool.Toolbox()
    t.add({
        'file': 'method1.py',
        'function': 'solve',
        'args': ['list'],
        'return': ['int']
    })
    t.add({
        'file': 'method2.py',
        'function': 'solve',
        'args': ['list'],
        'return': ['int']
    })
    t.add({
        'file': 'method3.py',
        'function': 'solve',
        'args': ['list'],
        'return': ['int']
    })

    results = mod_dt.DataTable(
        'results.csv',
        ',',
        col_names=['program', 'function', 'param', 'result', 'date_ran'])
    p.log_table(results)

    for tool_num, tool in enumerate(t.lstTools):
        print('tool=', tool)
        for param in params:
            result = t.run(tool, ['test' + str(tool_num), param], root_folder)
            p.record(results, '', [
                tool['file'], tool['function'], param, result,
                mod_dt.TodayAsString()
            ])
    results.save_csv(os.path.join(root_folder, 'allen_AI_results.csv'))

    p.build_report('allen_AI.rst', tpe='rst')
    print('Done...')
예제 #19
0
 def test_03_get_distinct_values_from_cols2(self):
     fle = cl.DataTable(fname, ',')
     fle.load_to_array()
     dist_cols = fle.get_distinct_values_from_cols(['TERM'])
     self.assertEqual(sorted(dist_cols), [{'5300', '7310'}])
예제 #20
0
 def test_03_get_distinct_values_from_cols1(self):
     fle = cl.DataTable(fname, ',')
     fle.load_to_array()
     dist_cols = fle.get_distinct_values_from_cols(['GENDER'])
     self.assertEqual(sorted(dist_cols), [{'F', 'M'}])
예제 #21
0
def main():
    """
    This is an example of project documentation using AIKIF
    It documents the project itself, including requirements, 
    design, test, goals, 
    
    """
    print('Initialising AIKIF Project...')
    name = 'AIKIF'
    type = 'Software'
    desc = """
     Artificial Intelligence Knowledge Information Framework - Project Overview
       
     This document was autogenerated via aikif/examples/AIKIF_project.py  
"""
    desc += '\n     Last updated ' + mod_dt.TodayAsString()

    fldr = os.getcwd(
    )  # 'T:\\user\\dev\\src\\python\\AIKIF\\aikif\\examples\\AIKIF_project'
    report_file_base = fldr + os.sep + 'aikif_report'
    p = mod_prj.Project(name, type, desc, fldr)
    project_setup(p)

    requirements = mod_dt.DataTable(
        'requirements.csv', ',', col_names=['id', 'dep_id', 'name', 'details'])
    p.log_table(requirements)
    p.record(requirements, '', [
        'a', '', 'process data',
        'automatically process source files to tables based on rules'
    ])
    p.record(requirements, '', [
        'b', '', 'define structures',
        'use mappings and ontology to specify what to do'
    ])
    p.record(requirements, '', ['c', '', 'log intent', 'log key events'])
    p.record(requirements, '', [
        'd', '', 'methods toolbox',
        'implement a set of programs that can be used generically'
    ])
    p.record(requirements, '',
             ['a01', 'a', 'download CSV', 'download CSV file from website'])
    p.record(
        requirements, '',
        ['a02', 'a', 'load CSV to table', 'import CSV file to Database table'])
    p.record(requirements, '', [
        'a03', 'a', 'find blank rows in CSV',
        'read CSV file and count DQ issues'
    ])
    p.record(
        requirements, '',
        ['a04', 'a', 'aggregate table', 'summarise Database table by col(n)'])

    issues = mod_dt.DataTable('issues.csv',
                              ',',
                              col_names=['id', 'name', 'details'])
    p.log_table(issues)
    p.record(issues, '', ['01', 'todo', 'implement AIKIF project logging'])

    #  p.build_report(report_file_base + '.md', type='md')
    p.build_report(report_file_base + '.rst', type='rst')
    print('Done...')