Exemple #1
0
 def test_organize_same_dir(self):
     table = get_file_paths_table("testDirMany")
     organize("testDirMany", "testDirMany")
     base_path = os.path.join(os.getcwd(), "testDirMany", "ENEE408A",
                              "HOMEWORK1")
     for files in table.values():
         for file in files:
             path = os.path.join(base_path, file)
             self.assertTrue(os.path.exists(path))
             self.assertTrue(os.path.isfile(path))
Exemple #2
0
 def test_organize_multiple_subdir(self):
     generate_files("testDirMany", "ENEE408A_HOMEWORK2_", numFiles=10)
     table = get_file_paths_table("testDirMany")
     organize("testDirMany", "testDirMany")
     base_path = os.path.join(os.getcwd(), "testDirMany")
     for path, files in table.items():
         subdir_path = os.path.join(base_path, path)
         for file in files:
             file_path = os.path.join(subdir_path, file)
             self.assertTrue(os.path.exists(file_path))
             self.assertTrue(os.path.isfile(file_path))
Exemple #3
0
def main():
    # organize original csv dataset then create new dataset files. (depicted below)
    #
    # dataset ----- profile.csv
    # 			|
    # 			|-- transactions.csv
    # 			|
    #			|-- dependent.csv
    organize.organize()

    # load organized dataset and pass it to data manager
    manager = DataManager()

    manager.read_profile(DATA_DIR + 'profile.csv')
    manager.read_transactions(DATA_DIR + 'transactions.csv')
    manager.read_dependent(DATA_DIR + 'dependent.csv')

    # feature selection (NOT IMPLEMENTED)
    manager.feature_engineering()

    # get training & test set (NOT IMPLEMENTED)
    A_train, y_train, A_test, y_test = manager.get_learning_data()
Exemple #4
0
 def test_excel(self):
     "Test parsing an Excel."
     filename = 'excel/simple_whitespace.xlsx'
     first_line = [
         (u'Year', 1901),
         (u'Increase', 1),
         (u'Decrease', 14),
         (u'Grade', u'A'),
     ]
     with self.file_handle(filename) as fh:
         lines = organize(fh)
         first = list(lines.next())
         self.assertEquals(first, first_line)
Exemple #5
0
 def test_tsv(self):
     "Test parsing a TSV."
     filename = 'tsv/imf_disb_repay.tsv'
     first_line = [
         (u'Year', u'2014'),
         (u'GRA Disbursements', u'2,765,465,000'),
         (u'GRA Repurchases', u'5,427,052,823'),
         (u'GRA Charges Paid', u'584,358,716'),
         (u'PRGT Disbursements', u'63,266,571'),
         (u'PRGT Repayments', u'132,553,847'),
         (u'PRGT Interest Paid', u'0'),
         (u'Total Disbursements', u'2,828,731,571'),
         (u'Total Repayments', u'5,559,606,670'),
         (u'Total Charges and Interest', u'584,358,716'),
     ]
     with self.file_handle(filename) as fh:
         lines = organize(fh)
         first = list(lines.next())
         self.assertEquals(first, first_line)
Exemple #6
0
    def test_to_dict(self):
        "Test transforming into dicts as described in README."
        filename = 'tsv/imf_disb_repay.tsv'
        first_dict = {
            u'PRGT Interest Paid': u'0',
            u'GRA Disbursements': u'2,765,465,000',
            u'GRA Charges Paid': u'584,358,716',
            u'Total Charges and Interest': u'584,358,716',
            u'PRGT Disbursements': u'63,266,571',
            u'Total Repayments': u'5,559,606,670',
            u'Year': u'2014',
            u'PRGT Repayments': u'132,553,847',
            u'GRA Repurchases': u'5,427,052,823',
            u'Total Disbursements': u'2,828,731,571',
        }

        with self.file_handle(filename) as fh:
            lines = organize(fh)
            first = dict(list(lines.next()))
            self.assertEquals(first, first_dict)
Exemple #7
0
    def test_csv(self):
        "Test parsing a CSV."
        filename = 'csv/worldbank_preamble.csv'
        first_line = [
            (u'Country', u'Belarus'), (u'Year', u'2000'),
            (u'CO2 emissions (metric tons per capita)', u'5.91'),
            (u'Electric power consumption (kWh per capita)', u'2988.71'),
            (u'Energy use (kg of oil equivalent per capita)', u'2459.67'),
            (u'Fertility rate, total (births per woman)', u'1.29'),
            (u'GNI per capita, Atlas method (current US$)', u'1.38E+03'),
            (u'Internet users (per 1,000 people)', u'18.69'),
            (u'Life expectancy at birth, total (years)', u'68.01'),
            (u'Military expenditure (% of GDP)', u'1.26'),
            (u'Population, total', u'1.00E+07'),
            (u'Prevalence of HIV, total (% of population ages 15-49)', u''),
        ]

        with self.file_handle(filename) as fh:
            lines = organize(fh)
            first = list(lines.next())
            self.assertEquals(first, first_line)
Exemple #8
0
def gather():
    allarticles = []
    for s in tqdm(srcs):
        data['sources'] = s
        r = requests.get("https://newsapi.org/v2/everything",
                         headers=headers,
                         params=data)
        for art in tqdm(r.json()['articles']):
            # just make a new dict
            getter = eval(s.title().replace("-", "") + "_text_getter")
            art_text = getter(art['url'])
            if art_text:
                d = {
                    "title": art['title'],
                    "blurb": art['description'],
                    "source": art['source']['name'],
                    "pubdate": art['publishedAt'],
                    "link": art['url'],
                    "text": art_text
                }
                allarticles.append(d)
    organized = organize.organize(allarticles)  # orig. organize.organize()
    with open('now.txt', 'w') as f:
        f.write(json.dumps(organized))
Exemple #9
0
 def test_organize_diff_loc(self):
     organize("testDir", "newTestDir")
     cwd = os.getcwd()
     expected = os.path.join(cwd, "newTestDir", "ENEE408A", "HOMEWORK1",
                             "ENEE408A_HOMEWORK1_0.txt")
     self.assertTrue(os.path.isfile(expected))
        if isfile(filepath):
            _, ext = splitext(filepath)
            if ext in exts:
                yield filepath


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Transform data files in directory into CSV.')
    parser.add_argument('src_dir', metavar='src_dir', help='path to transform')
    parser.add_argument('dest_dir', metavar='dest_dir', help='path to transform')
    args = parser.parse_args()

    for filepath in list_filepaths(args.src_dir):
        _, filename = split(filepath)
        with open(filepath, 'r') as fin:
            rows = organize(fin, filename=filename)
            dest = join(args.dest_dir, filename)
            print "Writing %s to %s" % (filepath, dest)
            with open(dest, 'w') as fout:
                writer = csv.writer(fout)
                first = list(islice(rows, 1))
                if len(first):
                    first = list(first[0])

                    # write header row based on first row
                    writer.writerow([x[0] for x in first])
                    # write value row based on first row
                    writer.writerow([x[1] for x in first])

                    # for remaining rows just write the data
                    for row in rows:
Exemple #11
0
from organize import organize
import pandas as pd

filename = 'your_file.csv'
with open(filename, 'r') as fin:
    df = pd.DataFrame.from_records(organize(fin, filename=filename))
    print df
Exemple #12
0
print('Welcome to File Organizer v0.0.1')
print('Loading...')
print()

import inputs
import organize

path = input('Enter PATH of Direcctory which you want to Organize: ')
files = organize.basic_work(path)
inputs.menu()

user_choice = input('Enter Task Number: ')
organize.organize(path, files, user_choice)
print('Organized!!')
Exemple #13
0
import json
from organize import organize

with open('messed_file.json') as file:
    messed_list = json.load(file)
    organize(messed_list)