Ejemplo n.º 1
0
# number of new records collected
new_records_processed = 0
with open(OUTPUT_FILE, 'a') as o:
	with open(OSF_PREPRINT_FILE, 'r') as f:
		for line in f:
			records = json.loads(line)

			for record in records['data']:
				if record['id'] not in records_processed:
					contributor_url = record['relationships']['contributors']['links']['related']['href']
					
					contributor_data = { 'id' : record['id'], 'data' : [] }
					
					while True:
						status, data = util.download_from_url(contributor_url)

						if status == 'SUCCESS':
							contributor_data['data'].append(data)

							if data['links']['next'] is None:
								break

							# get link to next set of records to download
							contributor_url = data['links']['next']
						else:
							break

					json.dump(contributor_data, o)
					o.write('\n')
					o.flush()
Ejemplo n.º 2
0
def download_cash_flow(symbol, force=True):
    url = cash_flow_url.format(symbol)
    filename = os.path.join(data_dir, symbol + '.cash_flow.xlsx')
    return download_from_url(url, filename, overwrite=force)
Ejemplo n.º 3
0
def download_income_stmt(symbol, force=True):
    url = income_statement_url.format(symbol)
    filename = os.path.join(data_dir, symbol + '.income_stmt.xlsx')
    return download_from_url(url, filename, overwrite=force)
Ejemplo n.º 4
0
def download_balance_sheet(symbol, force=True):
    url = balance_sheet_url.format(symbol)
    filename = os.path.join(data_dir, symbol + '.balance_sheet.xlsx')
    return download_from_url(url, filename, overwrite=force)
Ejemplo n.º 5
0
            if record['id'] not in records_processed:
                institution_data = {'id': record['id'], 'data': {}}

                for author_list in record['data']:
                    for author in author_list['data']:
                        try:
                            institution_url = author['embeds']['users'][
                                'data']['relationships']['institutions'][
                                    'links']['related']['href']
                        except:
                            institution_url = None

                        if institution_url is not None:
                            institution_list = []
                            while True:
                                status, data = util.download_from_url(
                                    institution_url)

                                if status == 'SUCCESS':
                                    institution_list.append(data)

                                    if data['links']['next'] is None:
                                        break

                                    # get link to next set of records to download
                                    institution_url = data['links']['next']
                                else:
                                    break

                            institution_data[author['id']] = institution_list

                json.dump(institution_data, o)