def setUp(self): self.pwd = get_installdir() self.tmpdir = tempfile.mkdtemp() self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" % self.pwd) self.result = Result() self.result.load_results(self.jsonfile)
def test_load(self): print "TESTING: load data" result = Result() data = result.load_results(self.jsonfile) self.assertTrue(isinstance(data,pandas.DataFrame)) self.assertTrue(data.shape[0] == 44) self.assertTrue(data.shape[1] == 13)
def download_data(data_loc, access_token=None, filters=None, battery=None, save=True, url=None, file_name=None): start_time = time() #Load Results from Database results = Result(access_token, filters=filters, url=url) data = results.data if 'experiment_exp_id' not in data.columns: data.loc[:, 'experiment_exp_id'] = [ x['exp_id'] for x in data['experiment'] ] if 'experiment_template' not in data.columns: data.loc[:, 'experiment_template'] = [ x['template'] for x in data['experiment'] ] if battery: data = result_filter(data, battery=battery) # remove duplicates remove_duplicates(data) # remove a few mistakes from data data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]' ) # Sandbox workers data.reset_index(drop=True, inplace=True) # if saving, save the data and the lookup file for anonymized workers if save == True: if file_name == None: file_name = 'mturk_data.json' if file_name[-4:] == 'json': data.to_json(os.path.join(data_loc, file_name)) elif file_name[-3:] == 'pkl': data.to_pickle(os.path.join(data_loc, file_name)) print('Finished saving') finish_time = (time() - start_time) / 60 print('Finished downloading data. Time taken: ' + str(finish_time)) return data
for col in drop_columns: filters[col] = {'drop': True} # Strip token from specified file f = open(token) access_token = f.read().strip() # Set up variables for the download request battery = 'Self Regulation Retest Battery' url = 'http://www.expfactory.org/new_api/results/62/' file_name = 'mturk_retest_data.json' fields = get_result_fields() # Create results object results = Result(access_token, filters=filters, url=url) # Clean filters from results objects results.clean_results(filters) # Extract data from the results object data = results.data # Remainder of download_data data = result_filter(data, battery=battery) remove_duplicates(data) data = data.query( 'worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]') # Sandbox workers data.reset_index(drop=True, inplace=True) # Save data