def run(self): print "STARTING PROCESS " + str(self.year) for month in range(1,13): print "***Starting to clean %d %d" % (self.year, month) #clean_text(self.year, month) combine_data.combine_data(self.year, month) print "***Done with %d %d" % (self.year, month) #content = 'Clean-Up done with %s' % str(self.year) #send_email(content) print "EXITING PROCESS " + str(self.year)
def collapse_data(data_dir, incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list): """ collapse_data reads in files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list n_b0s_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #------------------------------------------------------------------------------ from combine_data import combine_data from get_b0_orders import get_b0_orders #========================================================================== print ' Collapsing data: B0 orders by ec volume' # Find all the results files in all the b0_order folders for incl_excl, n_b0s, sep_av, transform, roi_name in it.product(incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders = None b0_orders = get_b0_orders(np.int(n_b0s)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order} # Read in every file and combine them for file in files: data = np.genfromtxt(file, dtype=None, delimiter=' ', names=True) data_allorders = combine_data(data_allorders, data, dict) # Name the results dir that this is going into: results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'ALL_ORDERS', sep_av, transform) return data_allorders, results_allorders_dir
def one_round_of_simulation(cwd,simultaneous_worker,wait_minute): #the first step we do is to generate the sh files write_sh_file_and_submit(simultaneous_worker,cwd) #the second step is to check whether the final data is generated start_time = int(time.time()) finished_generating_data=False succeed=np.zeros((simultaneous_worker,1)) while not finished_generating_data: for index in range(simultaneous_worker): x_file_name = os.path.join(cwd, "X_training" + str(index) + ".p") y_file_name = os.path.join(cwd, "Y_training" + str(index) + ".p") try: #try to open those files #as sometimes the files can still not be generated pickle.load(open(x_file_name, "rb")) pickle.load(open(y_file_name, "rb")) succeed[index,0]=1 except: pass time.sleep(15) if sum(sum(succeed))==simultaneous_worker: finished_generating_data=True print('finished this round of simulation in normal end') print(' ') print('for this round of simulation, the time has elapsed this much',int(time.time())-start_time,'seconds') if int(time.time())-start_time>60*wait_minute: print('stop this round of simulation because time constraint') subprocess.call('qdel -u zh296', shell=True) finished_generating_data = True #the third step is to increase the data # append new data to the total pickle file total_good_data_so_far = combine_data(simultaneous_worker) return total_good_data_so_far
def Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, colors, shapes): """ Q_ec_vol_n6 asks the question: "How does the volume that you register to affect the measurement when you use all the data" It reads in all the necessary files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #--------------------------------------------------------------------------- from combine_data import combine_data from get_b0_orders import get_b0_orders from plot_data import plot_data from read_in_data import read_in_data #========================================================================== print ' Question: How does the choice of eddy correct volume affect the measurements?' # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders = None b0_orders = get_b0_orders(np.int(6)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6', 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order} # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders = combine_data(data_allorders, data, dict) # Name the results dir that this is going into: results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6', 'ALL_ORDERS', sep_av, transform) # Now plot the data plot_data(data_allorders, results_allorders_dir, roi_name, colors, shapes)
if __name__ == '__main__': time_start = time.time() for i in range(LAX_ROUND): time_start_round = time.time() if i == 0: print('round:', i, 'is processing......') splite_data.splite_data(__ORIGIN_FOLDER__, __SPLITE_FOLDER__) print('Compelte splite data in round:', i) driver_origin_data_flix.driver_flix(__SPLITE_FOLDER__, __FLIXED_FOLDER__) print('Compelte flix data in round:', i) print('Deleting splite data in round:', i) os.system( "cd /home/youyizhe/TrafficDataProcesser/test_script/splite/;rm *.txt" ) combine_data.combine_data(__FLIXED_FOLDER__, __COMBINE_FOLDER__) print('Compelte combine data in round:', i) print('Deleting flixed data in round:', i) os.system( "cd /home/youyizhe/TrafficDataProcesser/test_script/flixed/;rm *.txt" ) else: print('round:', i, 'is processing......') splite_data.splite_data(__COMBINE_FOLDER__, __SPLITE_FOLDER__) print('Compelte splite data in round:', i) driver_origin_data_flix.driver_flix(__SPLITE_FOLDER__, __FLIXED_FOLDER__) print('Compelte flix data in round:', i) print('Deleting splite data in round:', i) os.system( "cd /home/youyizhe/TrafficDataProcesser/test_script/splite/;rm *.txt"
#!/home/vagrant/miniconda3/bin/python import docx from combine_data import combine_data combined_data, temperature_fail, unknown_fail = combine_data() total_tests = len(combined_data) tests_failed = len(temperature_fail) + len(unknown_fail) test_passed = total_tests - tests_failed test_fail_temp = len(temperature_fail) test_fail_unknown = len(unknown_fail) document = docx.Document() document.add_heading("Acme Test report", 0) document.add_paragraph('') test_ran_dates_str = "Tests Summary: Tests ran from: " + combined_data[0][0] + " to " + combined_data[-1][0] document.add_heading(test_ran_dates_str, level = 1) document.add_paragraph('') p = document.add_paragraph("Total number of tests: ")
#!/home/vagrant/miniconda3/bin/python import docx from combine_data import combine_data combined_data, temperature_fail, unknown_fail = combine_data() total_tests = len(combined_data) tests_failed = len(temperature_fail) + len(unknown_fail) test_passed = total_tests - tests_failed test_fail_temp = len(temperature_fail) test_fail_unknown = len(unknown_fail) document = docx.Document() document.add_heading("Acme Test report", 0) document.add_paragraph('') test_ran_dates_str = "Tests Summary: Tests ran from: " + combined_data[0][ 0] + " to " + combined_data[-1][0] document.add_heading(test_ran_dates_str, level=1) document.add_paragraph('') p = document.add_paragraph("Total number of tests: ") p.add_run(str(total_tests)).bold = True
for file_extension in file_extensions: files.extend([ obj.key for obj in s3.Bucket(manager_data['s3_bucket']).objects.all() if obj.key.startswith(f"results/{instance_id}") and obj.key.endswith(f".{file_extension}") ]) logging.info(f"Combining {len(files)} Partial Data Files") os.makedirs(f"results/{instance_id}") for file in files: response = s3.meta.client.download_file(manager_data['s3_bucket'], file, file) fileout = f"results/{instance_id}_{output_file}" combine_data(files, fileout) logging.info(f"Uploading combined data file '{fileout}' to S3 bucket") response = s3.meta.client.upload_file(fileout, manager_data['s3_bucket'], f"{fileout}") for file in files: try: os.remove(file) response = s3.meta.client.delete_object( Bucket=manager_data['s3_bucket'], Key=file) except FileNotFoundError: pass log_files = [ obj.key for obj in s3.Bucket(manager_data['s3_bucket']).objects.all() if
def Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list, colors, shapes): """ Q_ec_vol_n6 asks the question: "How does the number of B0s you include change your measurement?" It reads in all the necessary files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #--------------------------------------------------------------------------- from combine_data import combine_data from get_b0_orders import get_b0_orders from plot_data import plot_data from read_in_data import read_in_data #========================================================================== print ' Question: How does the number of B0s change your measurement' # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name, ec_b0 in it.product( incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list): # Start off with an empty data array data_allorders_allb0s = None for n_b0s in range(1, 7): b0_orders = get_b0_orders(np.int(n_b0s)) b0_orders = [order for order in b0_orders if order[:2] == ec_b0] for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order, 'n_b0s': n_b0s} # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders_allb0s = combine_data( data_allorders_allb0s, data, dict) # Name the results dir that this is going into: results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S', 'B0_{}'.format(ec_b0), sep_av, transform) # Now plot the data plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes) # Now do the same thing, but with REALLY all the B0s # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name in it.product( incl_excl_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders_allb0s = None for n_b0s in range(1, 7): b0_orders = get_b0_orders(np.int(n_b0s)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order, 'n_b0s': n_b0s} # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders_allb0s = combine_data( data_allorders_allb0s, data, dict) # Name the results dir that this is going into: results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S', 'ALL_B0S', sep_av, transform) # Now plot the data plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)
def collapse_data(data_dir, incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list): """ collapse_data reads in files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list n_b0s_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #------------------------------------------------------------------------------ from combine_data import combine_data from get_b0_orders import get_b0_orders #========================================================================== print ' Collapsing data: B0 orders by ec volume' # Find all the results files in all the b0_order folders for incl_excl, n_b0s, sep_av, transform, roi_name in it.product( incl_excl_list, n_b0s_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders = None b0_orders = get_b0_orders(np.int(n_b0s)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order} # Read in every file and combine them for file in files: data = np.genfromtxt(file, dtype=None, delimiter=' ', names=True) data_allorders = combine_data(data_allorders, data, dict) # Name the results dir that this is going into: results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'ALL_ORDERS', sep_av, transform) return data_allorders, results_allorders_dir
def Q_ec_vol_n6(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, colors, shapes): """ Q_ec_vol_n6 asks the question: "How does the volume that you register to affect the measurement when you use all the data" It reads in all the necessary files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #--------------------------------------------------------------------------- from combine_data import combine_data from get_b0_orders import get_b0_orders from plot_data import plot_data from read_in_data import read_in_data #========================================================================== print ' Question: How does the choice of eddy correct volume affect the measurements?' # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name in it.product( incl_excl_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders = None b0_orders = get_b0_orders(np.int(6)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6', 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = {'b0_order': b0_order} # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders = combine_data(data_allorders, data, dict) # Name the results dir that this is going into: results_allorders_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_6', 'ALL_ORDERS', sep_av, transform) # Now plot the data plot_data(data_allorders, results_allorders_dir, roi_name, colors, shapes)
def Q_n_b0s(data_dir, incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list, colors, shapes): """ Q_ec_vol_n6 asks the question: "How does the number of B0s you include change your measurement?" It reads in all the necessary files from a series of results_files and collapses across all of them so they can be plotted together Inputs: data_dir incl_excl_list sep_av_list transform_list roi_list Output: data array """ #========================================================================== import os import numpy as np import numpy.lib.recfunctions as rfn from glob import glob import itertools as it #--------------------------------------------------------------------------- from combine_data import combine_data from get_b0_orders import get_b0_orders from plot_data import plot_data from read_in_data import read_in_data #========================================================================== print ' Question: How does the number of B0s change your measurement' # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name, ec_b0 in it.product(incl_excl_list, sep_av_list, transform_list, roi_list, ec_b0_list): # Start off with an empty data array data_allorders_allb0s = None for n_b0s in range(1,7): b0_orders = get_b0_orders(np.int(n_b0s)) b0_orders = [ order for order in b0_orders if order[:2] == ec_b0 ] for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s } # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict) # Name the results dir that this is going into: results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S', 'B0_{}'.format(ec_b0), sep_av, transform) # Now plot the data plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes) # Now do the same thing, but with REALLY all the B0s # Find all the results files in all the b0_order folders for incl_excl, sep_av, transform, roi_name in it.product(incl_excl_list, sep_av_list, transform_list, roi_list): # Start off with an empty data array data_allorders_allb0s = None for n_b0s in range(1,7): b0_orders = get_b0_orders(np.int(n_b0s)) for b0_order in b0_orders: glob_string = os.path.join(data_dir, 'RESULTS', incl_excl, 'B0S_{}'.format(n_b0s), 'B0_ORDER_{}'.format(b0_order), sep_av, transform, '{}_FA_MD_vol.txt'.format(roi_name)) files = glob(glob_string) dict = { 'b0_order': b0_order, 'n_b0s' : n_b0s } # Read in every file and combine them for file in files: data = read_in_data(file) data_allorders_allb0s = combine_data(data_allorders_allb0s, data, dict) # Name the results dir that this is going into: results_allorders_allb0s_dir = os.path.join(data_dir, 'RESULTS', incl_excl, 'ALL_B0S', 'ALL_B0S', sep_av, transform) # Now plot the data plot_data(data_allorders_allb0s, results_allorders_allb0s_dir, roi_name, colors, shapes)