def preprocess_all(): bucket = util.get_bucket(config.S3_BUCKET_BATCH_RAW) for csv_obj in bucket.objects.all(): preprocess_file(config.S3_BUCKET_BATCH_RAW, csv_obj.key) print( colored( "Finished preprocessing file s3a://{0}/{1}".format( config.S3_BUCKET_BATCH_RAW, csv_obj.key), "green"))
def run(self): producer = kafka.KafkaProducer(bootstrap_servers=config.KAFKA_SERVERS) bucket = util.get_bucket(config.S3_BUCKET_STREAM) for json_obj in bucket.objects.all(): json_file = "s3://{0}/{1}".format(config.S3_BUCKET_STREAM, json_obj.key) for line in smart_open.smart_open(json_file): if config.LOG_DEBUG: print(line) time.sleep(config.KAFKA_PRODUCER_RATE) producer.send(config.KAFKA_TOPIC, line)
def smart_size(url): result = urllib.parse.urlparse(url) if result.scheme in ['', 'file']: return os.path.getsize(url) elif result.scheme == 's3': try: response = get_s3client_for_bucket(url).head_object(Bucket=get_bucket(result), Key=get_key(result)) size = response['ContentLength'] return size > 0 except Exception as e: return False
def smart_etag(url) -> str: result = urllib.parse.urlparse(url) if result.scheme in ['', 'file']: return os.path.getmtime(url) elif result.scheme == 's3': try: response = get_s3client_for_bucket(url).head_object(Bucket=get_bucket(result), Key=get_key(result)) size = response['ETag'][1:-1] return size > 0 except Exception as e: return False
def smart_move(source, destination): result = urllib.parse.urlparse(destination) if result.scheme in ['', 'file']: with smart_open.smart_open(source, 'rb') as src: with smart_open.smart_open(destination, 'wb') as dst: shutil.copyfileobj(src, dst) elif result.scheme == 's3': get_s3client_for_bucket(destination).upload_file(source, get_bucket(result), get_key(result), Callback=ProgressPercentage(smart_size(source), source)) os.remove(source)
def upload_files(ctx, record_id, fname): """Upload files to existing record """ token = ctx.obj['token'] zen_log = ctx.obj['log'] # get either sandbox or api token to connect # get bucket_url for record bucket_url = get_bucket(ctx.obj['url'], token, record_id) #read file paths from file with open(fname) as f: file_paths = f.readlines() # upload all files to record, one by one for f in file_paths: zen_log.info(f"Uploading {f} ...") f = f.replace('\n', '') status = upload_file(bucket_url, token, record_id, f) zen_log.info(f"Request status: {status}")
def dist_generator(particles, energy, machine, fort13, jobs, factor, emittance_x, emittance_y, alpha_x, alpha_y, beta_x, beta_y, offset_x, offset_xp, offset_y, offset_yp, dispersion_x, dispersion_y, bunch, spread, seed): job_str = '%s'%jobs # Getting the Transverse sigmas (amplitudes of phase space ellipse) # -------------------------------------------------------------------------------------------------------------- gamma_rel, beta_rel, p0, mass = get_rel_params(energy) tx_max, txp_max = get_sigmas(alpha_x, beta_x, emittance_x, dispersion_x, spread, beta_rel, gamma_rel) ty_max, typ_max = get_sigmas(alpha_y, beta_y, emittance_y, dispersion_y, spread, beta_rel, gamma_rel) # Seeding # -------------------------------------------------------------------------------------------------------------- if seed == 0: myseed = random.randint(0, 429496729) else: myseed = seed with open('seed.txt', 'a') as g: print >> g, 'job ', job_str ,'seed ', myseed np.random.seed(myseed) random.seed(myseed) # Generating the Transverse Distribution # -------------------------------------------------------------------------------------------------------------- x_t = np.asarray(np.random.normal(0, factor * tx_max, round(particles))) xp_t = np.asarray(np.random.normal(0, factor * txp_max, round(particles))) y_t = np.asarray(np.random.normal(0, factor * ty_max, round(particles))) yp_t = np.asarray(np.random.normal(0, factor * typ_max, round(particles))) # Rotating the Transverse Distribution # -------------------------------------------------------------------------------------------------------------- angle_x = np.arctan(-alpha_x/beta_x) x = x_t*np.cos(angle_x) - xp_t*np.sin(angle_x) xp = x_t*np.sin(angle_x) + xp_t*np.cos(angle_x) angle_y = np.arctan(-alpha_y/beta_y) y = y_t*np.cos(angle_y) - yp_t*np.sin(angle_y) yp = y_t*np.sin(angle_y) + yp_t*np.cos(angle_y) # Generating the Longitudinal Distribution # -------------------------------------------------------------------------------------------------------------- z = [] E = [] dp = [] while len(z) < particles: # Generate for as long time as is needed particle_z = random.gauss(0,1) particle_e = random.gauss(0,1) trial_z = particle_z * bunch trial_e = energy * (1 + particle_e*spread) #eV trial_p = np.sqrt((trial_e - mass) * (trial_e + mass)) dPP = (trial_p - p0) / p0 h = get_bucket(machine, plot=False, z=trial_z, DELTA=dPP) # Longitudinal contour if machine=='HL_coll' or machine=='HL_coll_200' or machine=='HL_coll_tcp' or machine=='HL_coll_tcp_200': Hmargin = -0.01 elif machine=='SPS_inj': Hmargin = -1 if h <= Hmargin: z.append(float(trial_z)) E.append(float(trial_e)) dp.append(dPP) else: print 'Outside margin, trying again,', h zz = np.asarray(z) EE = np.asarray(E) ddp = np.asarray(dp) if fort13=='False': outfile = 'init_dist_' + job_str + '.txt' with open(outfile, 'w') as f: for e1, e2, e3, e4, e5, e6 in zip(x, xp, y, yp, zz*1e3, EE*1e-6): f.write('%8.6e %8.6e %8.6e %8.6e %8.6e %8.6e\n' % (e1, e2, e3, e4, e5, e6)) elif fort13=='True': outfile = 'fort.13' with open(outfile, 'w') as f: for i in xrange(0, particles, 2): f.write(str((x[i] + offset_x)*1e3) + "\n") #mm f.write(str((xp[i] + offset_xp)*1e3) + "\n") #mrad f.write(str((y[i] + offset_y)*1e3) + "\n") #mm f.write(str((yp[i] + offset_yp)*1e3) + "\n") #mrad f.write(str(zz[i]*1e3) + "\n") #mm f.write(str(ddp[i]) + "\n") #- f.write(str((x[i+1] + offset_x)*1e3) + "\n") #mm f.write(str((xp[i+1] + offset_xp)*1e3) + "\n") #mrad f.write(str((y[i+1] + offset_y)*1e3) + "\n") #mm f.write(str((yp[i+1] + offset_yp)*1e3) + "\n") #mrad f.write(str(zz[i+1]*1e3) + "\n") #mm f.write(str(ddp[i+1]) + "\n") #- f.write(str(energy*1e-6) + "\n") #MeV f.write(str(EE[i]*1e-6) + "\n") #MeV f.write(str(EE[i+1]*1e-6) + "\n") #MeV else: print 'Please input True or False in the fourth argument'
def dist_generator(particles, energy, machine, fort13, jobs, factor, emittance_x, emittance_y, alpha_x, alpha_y, beta_x, beta_y, offset_x, offset_xp, offset_y, offset_yp, dispersion_x, dispersion_y, bunch, spread, seed): job_str = '%s' % jobs # Getting the Transverse sigmas (amplitudes of phase space ellipse) # -------------------------------------------------------------------------------------------------------------- gamma_rel, beta_rel, p0, mass = get_rel_params(energy) tx_max, txp_max = get_sigmas(alpha_x, beta_x, emittance_x, dispersion_x, spread, beta_rel, gamma_rel) ty_max, typ_max = get_sigmas(alpha_y, beta_y, emittance_y, dispersion_y, spread, beta_rel, gamma_rel) # Seeding # -------------------------------------------------------------------------------------------------------------- if seed == 0: myseed = random.randint(0, 429496729) else: myseed = seed with open('seed.txt', 'a') as g: print >> g, 'job ', job_str, 'seed ', myseed np.random.seed(myseed) random.seed(myseed) # Generating the Transverse Distribution # -------------------------------------------------------------------------------------------------------------- x_t = np.asarray(np.random.normal(0, factor * tx_max, round(particles))) xp_t = np.asarray(np.random.normal(0, factor * txp_max, round(particles))) y_t = np.asarray(np.random.normal(0, factor * ty_max, round(particles))) yp_t = np.asarray(np.random.normal(0, factor * typ_max, round(particles))) # Rotating the Transverse Distribution # -------------------------------------------------------------------------------------------------------------- angle_x = np.arctan(-alpha_x / beta_x) x = x_t * np.cos(angle_x) - xp_t * np.sin(angle_x) xp = x_t * np.sin(angle_x) + xp_t * np.cos(angle_x) angle_y = np.arctan(-alpha_y / beta_y) y = y_t * np.cos(angle_y) - yp_t * np.sin(angle_y) yp = y_t * np.sin(angle_y) + yp_t * np.cos(angle_y) # Generating the Longitudinal Distribution # -------------------------------------------------------------------------------------------------------------- z = [] E = [] dp = [] while len(z) < particles: # Generate for as long time as is needed particle_z = random.gauss(0, 1) particle_e = random.gauss(0, 1) trial_z = particle_z * bunch trial_e = energy * (1 + particle_e * spread) #eV trial_p = np.sqrt((trial_e - mass) * (trial_e + mass)) dPP = (trial_p - p0) / p0 h = get_bucket(machine, plot=False, z=trial_z, DELTA=dPP) # Longitudinal contour if machine == 'HL_coll' or machine == 'HL_coll_200' or machine == 'HL_coll_tcp' or machine == 'HL_coll_tcp_200': Hmargin = -0.01 elif machine == 'SPS_inj': Hmargin = -1 if h <= Hmargin: z.append(float(trial_z)) E.append(float(trial_e)) dp.append(dPP) else: print 'Outside margin, trying again,', h zz = np.asarray(z) EE = np.asarray(E) ddp = np.asarray(dp) if fort13 == 'False': outfile = 'init_dist_' + job_str + '.txt' with open(outfile, 'w') as f: for e1, e2, e3, e4, e5, e6 in zip(x, xp, y, yp, zz * 1e3, EE * 1e-6): f.write('%8.6e %8.6e %8.6e %8.6e %8.6e %8.6e\n' % (e1, e2, e3, e4, e5, e6)) elif fort13 == 'True': outfile = 'fort.13' with open(outfile, 'w') as f: for i in xrange(0, particles, 2): f.write(str((x[i] + offset_x) * 1e3) + "\n") #mm f.write(str((xp[i] + offset_xp) * 1e3) + "\n") #mrad f.write(str((y[i] + offset_y) * 1e3) + "\n") #mm f.write(str((yp[i] + offset_yp) * 1e3) + "\n") #mrad f.write(str(zz[i] * 1e3) + "\n") #mm f.write(str(ddp[i]) + "\n") #- f.write(str((x[i + 1] + offset_x) * 1e3) + "\n") #mm f.write(str((xp[i + 1] + offset_xp) * 1e3) + "\n") #mrad f.write(str((y[i + 1] + offset_y) * 1e3) + "\n") #mm f.write(str((yp[i + 1] + offset_yp) * 1e3) + "\n") #mrad f.write(str(zz[i + 1] * 1e3) + "\n") #mm f.write(str(ddp[i + 1]) + "\n") #- f.write(str(energy * 1e-6) + "\n") #MeV f.write(str(EE[i] * 1e-6) + "\n") #MeV f.write(str(EE[i + 1] * 1e-6) + "\n") #MeV else: print 'Please input True or False in the fourth argument'
def smart_delete(url): result = urllib.parse.urlparse(url) if result.scheme in ['', 'file']: return os.remove(url) elif result.scheme == 's3': response = get_s3client_for_bucket(url).delete_object(Bucket=get_bucket(result), Key=get_key(result))