def test_load(self): print "TESTING: load data" result = Result() data = result.load_results(self.jsonfile) self.assertTrue(isinstance(data,pandas.DataFrame)) self.assertTrue(data.shape[0] == 44) self.assertTrue(data.shape[1] == 13)
def setUp(self): self.pwd = get_installdir() self.tmpdir = tempfile.mkdtemp() self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" % self.pwd) self.result = Result() self.result.load_results(self.jsonfile)
def test_load(self): print "TESTING: load data" result = Result() data = result.load_results(self.jsonfile) self.assertTrue(isinstance(data, pandas.DataFrame)) self.assertTrue(data.shape[0] == 44) self.assertTrue(data.shape[1] == 13)
def download_data(data_loc, access_token=None, filters=None, battery=None, save=True, url=None, file_name=None): start_time = time() #Load Results from Database results = Result(access_token, filters=filters, url=url) data = results.data if 'experiment_exp_id' not in data.columns: data.loc[:, 'experiment_exp_id'] = [ x['exp_id'] for x in data['experiment'] ] if 'experiment_template' not in data.columns: data.loc[:, 'experiment_template'] = [ x['template'] for x in data['experiment'] ] if battery: data = result_filter(data, battery=battery) # remove duplicates remove_duplicates(data) # remove a few mistakes from data data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]' ) # Sandbox workers data.reset_index(drop=True, inplace=True) # if saving, save the data and the lookup file for anonymized workers if save == True: if file_name == None: file_name = 'mturk_data.json' if file_name[-4:] == 'json': data.to_json(os.path.join(data_loc, file_name)) elif file_name[-3:] == 'pkl': data.to_pickle(os.path.join(data_loc, file_name)) print('Finished saving') finish_time = (time() - start_time) / 60 print('Finished downloading data. Time taken: ' + str(finish_time)) return data
for col in drop_columns: filters[col] = {'drop': True} # Strip token from specified file f = open(token) access_token = f.read().strip() # Set up variables for the download request battery = 'Self Regulation Retest Battery' url = 'http://www.expfactory.org/new_api/results/62/' file_name = 'mturk_retest_data.json' fields = get_result_fields() # Create results object results = Result(access_token, filters=filters, url=url) # Clean filters from results objects results.clean_results(filters) # Extract data from the results object data = results.data # Remainder of download_data data = result_filter(data, battery=battery) remove_duplicates(data) data = data.query( 'worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]') # Sandbox workers data.reset_index(drop=True, inplace=True) # Save data
from expanalysis.experiments.utils import remove_duplicates from expanalysis.results import get_filters, Result # default filters to clean up the downloaded results object filters = get_filters() # expfactory.org/token access_token = "1111111111" # expfactory.org/token # url of the battery # for example, if the battery url is: http://expfactory.org/batteries/999/ # the url below will be: http://www.expfactory.org/new_api/results/999/ url = 'http://www.expfactory.org/new_api/results/999/' # create a results object results = Result(access_token, filters=filters, url=url) # we care about the data data = results.data # remove duplicates - there shouldn't be any, just a safety precaution remove_duplicates(data) """ The results.data object has one row per worker/experiment pair. The column "data" holds the data as a dictionary. Unofrunately, this data is in a 1-length list, and the data's "trialdata" is really what you want. To index the first row's data you would index the results.data object like so: >>> first_row_data = results.data.iloc[0]['data'][0]['trialdata'] Which can then be converted into a dataframe easily >>> first_row_data = pandas.DataFrame(first_row_data) """
class TestAPI(unittest.TestCase): def setUp(self): self.pwd = get_installdir() self.tmpdir = tempfile.mkdtemp() self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" %self.pwd) self.result = Result() self.result.load_results(self.jsonfile) def tearDown(self): shutil.rmtree(self.tmpdir) def test_check_numeric(self): not_numeric = ["hello","goodbye"] numeric_float = [1.1,2.2,3.3] numeric_int = [1,2,3] not_numeric_mixed = ["hello",2,3.0] self.assertTrue(check_numeric(numeric_float)) self.assertTrue(check_numeric(numeric_int)) self.assertTrue(check_numeric(not_numeric)==False) self.assertTrue(check_numeric(not_numeric_mixed)==False) def test_filter(self): filtered = self.result.filter(field="experiment_exp_id",value="bridge_game") self.assertTrue(filtered.shape[0]==20) self.assertTrue(len(numpy.unique(filtered["experiment_exp_id"]))==1) def test_load(self): print "TESTING: load data" result = Result() data = result.load_results(self.jsonfile) self.assertTrue(isinstance(data,pandas.DataFrame)) self.assertTrue(data.shape[0] == 44) self.assertTrue(data.shape[1] == 13) def test_experiment_extract(self): print "TESTING: experiment extraction" experiment = self.result.extract_experiment(exp_id="stroop") experiment_columns = [u'block_duration', u'condition', u'correct', u'correct_response', u'current_trial', u'dateTime', u'feedback_duration', u'internal_node_id', u'key_press', u'possible_responses', u'responses', u'rt', u'stim_color', u'stim_duration', u'stim_word', u'stimulus', u'time_elapsed', u'timing_post_trial', u'trial_id', u'trial_index', u'trial_type', u'trialdata', u'uniqueid', u'view_history'] self.assertTrue(isinstance(experiment,pandas.DataFrame)) self.assertTrue(experiment.shape[0]==747) [self.assertTrue(x) in experiment.columns for x in experiment_columns] def test_survey_extract(self): print "TESTING: survey extraction" survey = self.result.extract_experiment(exp_id="bis11_survey") self.assertTrue(isinstance(survey,pandas.DataFrame)) def test_game_extract(self): print "TESTING: game extraction" game = self.result.extract_experiment(exp_id="bridge_game") game_columns = [u'current_trial', u'uniqueid', u'dateTime', u'ACC', u'RT', u'solution', u'problem_id', u'trial', u'finished', u'points', u'answer', u'n1', u'n2', u'problem'] self.assertTrue(isinstance(game,pandas.DataFrame)) self.assertTrue(game.shape[0]==301) [self.assertTrue(x) in game.columns for x in game_columns]
class TestAPI(unittest.TestCase): def setUp(self): self.pwd = get_installdir() self.tmpdir = tempfile.mkdtemp() self.jsonfile = os.path.abspath("%s/tests/data/results/results.json" % self.pwd) self.result = Result() self.result.load_results(self.jsonfile) def tearDown(self): shutil.rmtree(self.tmpdir) def test_check_numeric(self): not_numeric = ["hello", "goodbye"] numeric_float = [1.1, 2.2, 3.3] numeric_int = [1, 2, 3] not_numeric_mixed = ["hello", 2, 3.0] self.assertTrue(check_numeric(numeric_float)) self.assertTrue(check_numeric(numeric_int)) self.assertTrue(check_numeric(not_numeric) == False) self.assertTrue(check_numeric(not_numeric_mixed) == False) def test_filter(self): filtered = self.result.filter(field="experiment_exp_id", value="bridge_game") self.assertTrue(filtered.shape[0] == 20) self.assertTrue(len(numpy.unique(filtered["experiment_exp_id"])) == 1) def test_load(self): print "TESTING: load data" result = Result() data = result.load_results(self.jsonfile) self.assertTrue(isinstance(data, pandas.DataFrame)) self.assertTrue(data.shape[0] == 44) self.assertTrue(data.shape[1] == 13) def test_experiment_extract(self): print "TESTING: experiment extraction" experiment = self.result.extract_experiment(exp_id="stroop") experiment_columns = [ u"block_duration", u"condition", u"correct", u"correct_response", u"current_trial", u"dateTime", u"feedback_duration", u"internal_node_id", u"key_press", u"possible_responses", u"responses", u"rt", u"stim_color", u"stim_duration", u"stim_word", u"stimulus", u"time_elapsed", u"timing_post_trial", u"trial_id", u"trial_index", u"trial_type", u"trialdata", u"uniqueid", u"view_history", ] self.assertTrue(isinstance(experiment, pandas.DataFrame)) self.assertTrue(experiment.shape[0] == 747) [self.assertTrue(x) in experiment.columns for x in experiment_columns] def test_survey_extract(self): print "TESTING: survey extraction" survey = self.result.extract_experiment(exp_id="bis11_survey") self.assertTrue(isinstance(survey, pandas.DataFrame)) def test_game_extract(self): print "TESTING: game extraction" game = self.result.extract_experiment(exp_id="bridge_game") game_columns = [ u"current_trial", u"uniqueid", u"dateTime", u"ACC", u"RT", u"solution", u"problem_id", u"trial", u"finished", u"points", u"answer", u"n1", u"n2", u"problem", ] self.assertTrue(isinstance(game, pandas.DataFrame)) self.assertTrue(game.shape[0] == 301) [self.assertTrue(x) in game.columns for x in game_columns]
for col in drop_columns: filters[col] = {'drop': True} # Strip token from specified file f = open(token) access_token = f.read().strip() # Set up variables for the download request battery = 'Self Regulation Retest Battery' url = 'http://www.expfactory.org/new_api/results/62/' file_name = 'mturk_retest_data.json' fields = get_result_fields() # Create results object results = Result(access_token, filters = filters, url = url) # Clean filters from results objects results.clean_results(filters) # Extract data from the results object data = results.data # Remainder of download_data data = result_filter(data, battery = battery) remove_duplicates(data) data = data.query('worker_id not in ["A254JKSDNE44AM", "A1O51P5O9MC5LX"]') # Sandbox workers data.reset_index(drop = True, inplace = True) # Save data data.to_json(path.join(data_dir, file_name))