コード例 #1
0
 def test_organize_same_dir(self):
     table = get_file_paths_table("testDirMany")
     organize("testDirMany", "testDirMany")
     base_path = os.path.join(os.getcwd(), "testDirMany", "ENEE408A",
                              "HOMEWORK1")
     for files in table.values():
         for file in files:
             path = os.path.join(base_path, file)
             self.assertTrue(os.path.exists(path))
             self.assertTrue(os.path.isfile(path))
コード例 #2
0
 def test_organize_multiple_subdir(self):
     generate_files("testDirMany", "ENEE408A_HOMEWORK2_", numFiles=10)
     table = get_file_paths_table("testDirMany")
     organize("testDirMany", "testDirMany")
     base_path = os.path.join(os.getcwd(), "testDirMany")
     for path, files in table.items():
         subdir_path = os.path.join(base_path, path)
         for file in files:
             file_path = os.path.join(subdir_path, file)
             self.assertTrue(os.path.exists(file_path))
             self.assertTrue(os.path.isfile(file_path))
コード例 #3
0
ファイル: main.py プロジェクト: IreneGaoc/University-project
def main():
    # organize original csv dataset then create new dataset files. (depicted below)
    #
    # dataset ----- profile.csv
    # 			|
    # 			|-- transactions.csv
    # 			|
    #			|-- dependent.csv
    organize.organize()

    # load organized dataset and pass it to data manager
    manager = DataManager()

    manager.read_profile(DATA_DIR + 'profile.csv')
    manager.read_transactions(DATA_DIR + 'transactions.csv')
    manager.read_dependent(DATA_DIR + 'dependent.csv')

    # feature selection (NOT IMPLEMENTED)
    manager.feature_engineering()

    # get training & test set (NOT IMPLEMENTED)
    A_train, y_train, A_test, y_test = manager.get_learning_data()
コード例 #4
0
ファイル: test_organize.py プロジェクト: lethain/organize
 def test_excel(self):
     "Test parsing an Excel."
     filename = 'excel/simple_whitespace.xlsx'
     first_line = [
         (u'Year', 1901),
         (u'Increase', 1),
         (u'Decrease', 14),
         (u'Grade', u'A'),
     ]
     with self.file_handle(filename) as fh:
         lines = organize(fh)
         first = list(lines.next())
         self.assertEquals(first, first_line)
コード例 #5
0
ファイル: test_organize.py プロジェクト: lethain/organize
 def test_tsv(self):
     "Test parsing a TSV."
     filename = 'tsv/imf_disb_repay.tsv'
     first_line = [
         (u'Year', u'2014'),
         (u'GRA Disbursements', u'2,765,465,000'),
         (u'GRA Repurchases', u'5,427,052,823'),
         (u'GRA Charges Paid', u'584,358,716'),
         (u'PRGT Disbursements', u'63,266,571'),
         (u'PRGT Repayments', u'132,553,847'),
         (u'PRGT Interest Paid', u'0'),
         (u'Total Disbursements', u'2,828,731,571'),
         (u'Total Repayments', u'5,559,606,670'),
         (u'Total Charges and Interest', u'584,358,716'),
     ]
     with self.file_handle(filename) as fh:
         lines = organize(fh)
         first = list(lines.next())
         self.assertEquals(first, first_line)
コード例 #6
0
ファイル: test_organize.py プロジェクト: lethain/organize
    def test_to_dict(self):
        "Test transforming into dicts as described in README."
        filename = 'tsv/imf_disb_repay.tsv'
        first_dict = {
            u'PRGT Interest Paid': u'0',
            u'GRA Disbursements': u'2,765,465,000',
            u'GRA Charges Paid': u'584,358,716',
            u'Total Charges and Interest': u'584,358,716',
            u'PRGT Disbursements': u'63,266,571',
            u'Total Repayments': u'5,559,606,670',
            u'Year': u'2014',
            u'PRGT Repayments': u'132,553,847',
            u'GRA Repurchases': u'5,427,052,823',
            u'Total Disbursements': u'2,828,731,571',
        }

        with self.file_handle(filename) as fh:
            lines = organize(fh)
            first = dict(list(lines.next()))
            self.assertEquals(first, first_dict)
コード例 #7
0
ファイル: test_organize.py プロジェクト: lethain/organize
    def test_csv(self):
        "Test parsing a CSV."
        filename = 'csv/worldbank_preamble.csv'
        first_line = [
            (u'Country', u'Belarus'), (u'Year', u'2000'),
            (u'CO2 emissions (metric tons per capita)', u'5.91'),
            (u'Electric power consumption (kWh per capita)', u'2988.71'),
            (u'Energy use (kg of oil equivalent per capita)', u'2459.67'),
            (u'Fertility rate, total (births per woman)', u'1.29'),
            (u'GNI per capita, Atlas method (current US$)', u'1.38E+03'),
            (u'Internet users (per 1,000 people)', u'18.69'),
            (u'Life expectancy at birth, total (years)', u'68.01'),
            (u'Military expenditure (% of GDP)', u'1.26'),
            (u'Population, total', u'1.00E+07'),
            (u'Prevalence of HIV, total (% of population ages 15-49)', u''),
        ]

        with self.file_handle(filename) as fh:
            lines = organize(fh)
            first = list(lines.next())
            self.assertEquals(first, first_line)
コード例 #8
0
def gather():
    allarticles = []
    for s in tqdm(srcs):
        data['sources'] = s
        r = requests.get("https://newsapi.org/v2/everything",
                         headers=headers,
                         params=data)
        for art in tqdm(r.json()['articles']):
            # just make a new dict
            getter = eval(s.title().replace("-", "") + "_text_getter")
            art_text = getter(art['url'])
            if art_text:
                d = {
                    "title": art['title'],
                    "blurb": art['description'],
                    "source": art['source']['name'],
                    "pubdate": art['publishedAt'],
                    "link": art['url'],
                    "text": art_text
                }
                allarticles.append(d)
    organized = organize.organize(allarticles)  # orig. organize.organize()
    with open('now.txt', 'w') as f:
        f.write(json.dumps(organized))
コード例 #9
0
 def test_organize_diff_loc(self):
     organize("testDir", "newTestDir")
     cwd = os.getcwd()
     expected = os.path.join(cwd, "newTestDir", "ENEE408A", "HOMEWORK1",
                             "ENEE408A_HOMEWORK1_0.txt")
     self.assertTrue(os.path.isfile(expected))
コード例 #10
0
        if isfile(filepath):
            _, ext = splitext(filepath)
            if ext in exts:
                yield filepath


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Transform data files in directory into CSV.')
    parser.add_argument('src_dir', metavar='src_dir', help='path to transform')
    parser.add_argument('dest_dir', metavar='dest_dir', help='path to transform')
    args = parser.parse_args()

    for filepath in list_filepaths(args.src_dir):
        _, filename = split(filepath)
        with open(filepath, 'r') as fin:
            rows = organize(fin, filename=filename)
            dest = join(args.dest_dir, filename)
            print "Writing %s to %s" % (filepath, dest)
            with open(dest, 'w') as fout:
                writer = csv.writer(fout)
                first = list(islice(rows, 1))
                if len(first):
                    first = list(first[0])

                    # write header row based on first row
                    writer.writerow([x[0] for x in first])
                    # write value row based on first row
                    writer.writerow([x[1] for x in first])

                    # for remaining rows just write the data
                    for row in rows:
コード例 #11
0
ファイル: with_pandas.py プロジェクト: lethain/organize
from organize import organize
import pandas as pd

filename = 'your_file.csv'
with open(filename, 'r') as fin:
    df = pd.DataFrame.from_records(organize(fin, filename=filename))
    print df
コード例 #12
0
ファイル: main.py プロジェクト: Nisarg1901/file_organizer
print('Welcome to File Organizer v0.0.1')
print('Loading...')
print()

import inputs
import organize

path = input('Enter PATH of Direcctory which you want to Organize: ')
files = organize.basic_work(path)
inputs.menu()

user_choice = input('Enter Task Number: ')
organize.organize(path, files, user_choice)
print('Organized!!')
コード例 #13
0
ファイル: main.py プロジェクト: dev-natalia/DeeperSystemsTest
import json
from organize import organize

with open('messed_file.json') as file:
    messed_list = json.load(file)
    organize(messed_list)