def schiffts(): # some initialization old_data = {} data_queue = [] current_data = None next_hit = {} last_update = '' intensity = 0 temperature_data = {'status': 0} storage = DataStorage(settings.COLLECTOR_DATA_FILE) # get date now = datetime.now() latest_radar = now - timedelta(0, 10*60) # radar has a 8minute-ish delay, so go 10minutes back in time timestamp = build_timestamp(latest_radar) if settings.DEBUG: print "current timestamp: %s"%timestamp old_rain, old_last_rain, old_last_dry, old_snow, old_data_queue, old_location_weather = storage.load_data() # get data from srf.ch up to now for minutes in range(0, settings.NO_SAMPLES+3): timestamp = build_timestamp(latest_radar - timedelta(0, 60*5*minutes)) # try to retrieve a measurement for the timestamp from the old data queue old_measurement = next((item for item in old_data_queue if item.timestamp == timestamp), None) # get a new measurement from srf.ch if it wasn't found in the old data queue if not old_measurement: try: measurement = Measurement((settings.X_LOCATION, settings.Y_LOCATION), timestamp, 3, 105) measurement.analyze_image() data_queue.append(measurement) if settings.DEBUG: print "add sample with timestamp %s"%timestamp if minutes == 0: current_data = measurement last_update = timestamp except Exception, e: print "fail in queuefiller: %s" % e # use old data else: if settings.DEBUG: print "%s already in queue"%timestamp if minutes == 0: current_data = old_measurement last_update = timestamp data_queue.append(old_measurement) if len(data_queue) == settings.NO_SAMPLES: break
def readMotorDump(fnameOrFolder,asDataStorage=True,\ default_fname="motor_position_after_data_collection.txt"): """ Read waxecollect style motor dump if fnameOrFolder is a folder, default_fname is read if asDataStorage is False: return recArray with fields name,user,dial else: return dictory like object (each motor is a key) """ if os.path.isfile(fnameOrFolder): fname = fnameOrFolder else: fname = os.path.join(fnameOrFolder, default_fname) data = np.genfromtxt(fname, names=True, dtype=("<U15", float, float)) # remove interleaved headers idx_to_remove = data['name'] == 'name' data = data[~idx_to_remove] # for i in range(data.shape[0]): data['name'][i] = data['name'][i].decode('ascii') if asDataStorage: motor_pos = collections.namedtuple('motor_pos', ['user', 'dial']) ret = dict() for imotor, motor in enumerate(data['name']): ret[motor] = motor_pos(dial=data['dial'][imotor], user=data['user'][imotor]) data = DataStorage(ret) return data
def average(fileOrFolder, delays=slice(None), scale=1, norm=None, returnAll=False, plot=False, showTrend=False): data = DataStorage(fileOrFolder) if isinstance(delays, slice): idx = np.arange(data.delays.shape[0])[delays] elif isinstance(delays, (int, float)): idx = data.delays == float(delays) else: idx = data.delays < 0 if idx.sum() == 0: print("No data with the current filter") return None i = data.data[idx] q = data.q if isinstance(norm, (tuple, list)): idx = (q > norm[0]) & (q < norm[1]) norm = np.nanmean(i[:, idx], axis=1) i = i / norm[:, np.newaxis] if isinstance(norm, np.ndarray): i = i / norm[:, np.newaxis] title = "%s %s" % (fileOrFolder, str(delays)) utils.plotdata(q, i * scale, showTrend=showTrend, plot=plot, title=title) if returnAll: return q, i.mean(axis=0) * scale, i else: return q, i.mean(axis=0) * scale
def readLogFile(fname, skip_first=0, last=None, converters=None, output="datastorage"): """ read generic log file efficiently lines starting with "#" will be skipped last line starting with # will be used to find the keys converters is used convert a certain field. (see np.genfromtxt) output is a string, if 'datastorage' or 'dict' data is converted else it is left as recarray """ # makes 'output' case insentive if isinstance(output, str): output = output.lower() with open(fname, "r") as f: lines = f.readlines() lines = [line.strip() for line in lines] # find last line starting with "#" for iline, line in enumerate(lines): if line.lstrip()[0] != "#": break # extract names (numpy can do it but gets confused with "# dd# # as it does not like the space ... names = lines[iline - 1][1:].split() data = np.genfromtxt(fname, skip_header=iline, names=names, dtype=None, converters=converters, excludelist=[]) # skip firsts/lasts data = data[skip_first:last] # force previously found names, numpy changes file to file_ names = [name.strip("_") for name in data.dtype.names] data.dtype.names = names # convert to string columns that can be dtype = data.dtype.descr newtype = [] for (name, type_str) in dtype: name = name.strip("_") # numpy changes file to file_ type_str = type_str.replace("|S", "<U") newtype.append((name, type_str)) data = data.astype(newtype) if output.lower() == "dict": # convert to dict data = dict((name, data[name]) for name in data.dtype.names) elif output.lower() == "datastorage": data = dict((name, data[name]) for name in data.dtype.names) data = DataStorage(data) return data
def load_available_storages(self): try: result = self.execute_request(str(self.api_url) + self.LOAD_AVAILABLE_STORAGES) if result is None: return [] return [DataStorage.from_json(item) for item in result] except Exception as e: raise RuntimeError("Failed to load storages with READ and WRITE permissions. " "Error message: {}".format(str(e.message)))
def save(ds: datastorage.DataStorage, args): settings_str = ds.get_preferences() db_settings = json.loads(settings_str) if settings_str is not None else {} #print('db_settings', db_settings) #print('argzzz', args.get) font_size = args.get('font_size', None, type=int) if font_size: if not 5 <= font_size <= 32: raise AttributeError('Font size is out of range [5, 32]') db_settings['font_size'] = font_size theme = args.get('theme', None, type=str) if theme: if theme not in (Settings.THEME_LIGHT, Settings.THEME_DARK, Settings.THEME_SEPIA): raise AttributeError('Unknown theme: ' + theme) db_settings['theme'] = theme settings_str = json.dumps(db_settings) #print(settings_str) ds.set_preferences(settings_str)
def load(ds: datastorage.DataStorage): settings_str = ds.get_preferences() db_settings = json.loads(settings_str) if settings_str is not None else {} def_settings = { 'font_size': Settings.DEF_FONT_SIZE, 'theme': Settings.DEF_THEME } overall_settings = {**def_settings, **db_settings} return overall_settings
def leastsq_circle(x, y): """ Utility funciton to fit a circle given x,y positions of points """ # coordinates of the baricenter center_estimate = np.nanmean(x), np.nanmean(y) center, ier = optimize.leastsq(_chi2, center_estimate, args=(x, y)) xc, yc = center Ri = _calc_R(x, y, *center) R = Ri.mean() residu = np.sum((Ri - R)**2) return DataStorage(center=np.asarray((xc, yc)), radius=R)
def doFolder_dataRed(azavStorage, funcForAveraging=np.nanmean, outStorageFile='auto', reference='min', chi2_0_max='auto', saveTxt=True, first=None, last=None): """ azavStorage if a DataStorage instance or the filename to read """ if isinstance(azavStorage, DataStorage): azav = azavStorage folder = azavStorage.folder elif os.path.isfile(azavStorage): folder = os.path.dirname(azavStorage) azav = DataStorage(azavStorage) else: # assume is just a folder name folder = azavStorage azavStorage = folder + "/pyfai_1d" + default_extension azav = DataStorage(azavStorage) if last is not None or first is not None: idx = slice(first, last) azav.log.delay = azav.log.delay[idx] azav.data_norm = azav.data_norm[idx] azav.err_norm = azav.err_norm[idx] # calculate differences tr = dataReduction.calcTimeResolvedSignal( azav.log.delay, azav.data_norm, err=azav.err_norm, q=azav.q, reference=reference, funcForAveraging=funcForAveraging, chi2_0_max=chi2_0_max) tr.folder = folder tr.twotheta_rad = azav.twotheta_rad tr.twotheta_deg = azav.twotheta_deg tr.info = azav.pyfai_info if outStorageFile == 'auto': if not os.path.isdir(folder): folder = "./" outStorageFile = folder + "/diffs" + default_extension tr.filename = outStorageFile # save txt and npz file if saveTxt: dataReduction.saveTxt(folder, tr, info=azav.pyfai_info) tr.save(outStorageFile) return tr
def find_center_using_clicks(img, X=None, Y=None, clim='auto'): """ Find beam center position fitting points (selected by clicks) on a ring Parameters ========== img: array or string image to use, if string, reads it with fabio X,Y: None or arrays position of center of pixels, if given they will have to have same shape as img, if None, they will be created with meshgrid clim: tuple|'auto' for color scale """ # interpret inputs img = _prepare_img(img) if clim == 'auto': clim = np.nanpercentile(img.ravel(), (90, 100)) shape = img.shape if X is None or Y is None: X, Y = np.meshgrid(range(shape[1]), range(shape[0])) ans = 'ok' while (ans != 'done'): ax = plt.gca() ax.pcolormesh(X, Y, img, cmap=plt.cm.gray, vmin=clim[0], vmax=clim[1]) print("Select points on a ring, middle-click to stop") coords = plt.ginput(-1) coords = np.asarray(coords).T xc, yc, R = leastsq_circle(coords[0], coords[1]) circle = plt.Circle((xc, yc), radius=R, lw=5, color='green', fill=False) ax.add_artist(circle) print("Found circle at (%.4f,%.4f), R = %.4f" % (xc, yc, R)) ax.plot(xc, yc, "o", color="green", markersize=5) plt.draw() ans = input("type 'done' to finish, anything else to try again") return DataStorage(xc=xc, yc=yc, R=R)
def fit_ellipse(x, y): """ Utility funciton to fit an ellipse given x,y positions of points """ # from http://nicky.vanforeest.com/misc/fitEllipse/fitEllipse.html x = x[:, np.newaxis] y = y[:, np.newaxis] D = np.hstack((x * x, x * y, y * y, x, y, np.ones_like(x))) S = np.dot(D.T, D) C = np.zeros([6, 6]) C[0, 2] = C[2, 0] = 2 C[1, 1] = -1 E, V = np.linalg.eig(np.dot(np.linalg.inv(S), C)) n = np.argmax(np.abs(E)) A = V[:, n] # center b, c, d, f, g, a = A[1] / 2, A[2], A[3] / 2, A[4] / 2, A[5], A[0] num = b * b - a * c x0 = (c * d - b * f) / num y0 = (a * f - b * d) / num center = np.array([x0, y0]) # angle of rotation b, c, d, f, g, a = A[1] / 2, A[2], A[3] / 2, A[4] / 2, A[5], A[0] angle = np.rad2deg(0.5 * np.arctan(2 * b / (a - c))) # axis b, c, d, f, g, a = A[1] / 2, A[2], A[3] / 2, A[4] / 2, A[5], A[0] up = 2 * (a * f * f + c * d * d + g * b * b - 2 * b * d * f - a * c * g) down1 = (b * b - a * c) * ((c - a) * np.sqrt(1 + 4 * b * b / ((a - c) * (a - c))) - (c + a)) down2 = (b * b - a * c) * ((a - c) * np.sqrt(1 + 4 * b * b / ((a - c) * (a - c))) - (c + a)) res1 = np.sqrt(up / down1) res2 = np.sqrt(up / down2) axis = np.array([res1, res2]) return DataStorage(center=center, axis=axis, angle=angle, radius=np.mean(axis))
def calc_best_transmission(self, E, requested_tramission, verbose=False, use_progressive=False): """ E must be a float, can't be a vector """ E = float(E) if use_progressive: t = self._calc_all_transmissions(E) best = np.argmin(np.abs(t - requested_tramission)) best_combination = self._att[best] else: t = self._calc_all_transmissions_progressive(E) best = np.argmin(np.abs(t - requested_tramission)) best_combination = self._att_progressive[best] t_1E = t[best] t_2E = self.calc_transmission(2 * E, best_combination) t_3E = self.calc_transmission(3 * E, best_combination) if verbose: print( f"Finding set for T={requested_tramission:.3g} @ {E:.3f} keV") print(f"best set is {best_combination}:") print(f" {self._show_combination(best_combination)}") print( f"transmission @ E is {float(t[best]):.3g} (asked {requested_tramission:.3g})" ) print(f"transmission @ 2E is {t_2E:.3g}") print(f"transmission @ 3E is {t_3E:.3g}") return DataStorage( bestset=best_combination, transmission=t_1E, energy=E, transmission_requested=requested_tramission, t1E=t_1E, t2E=t_2E, t3E=t_3E, )
def readLogFile(fnameOrFolder, subtractDark=False, skip_first=0, asDataStorage=True, last=None, srcur_min=30): """ read id9 style logfile; last before data will be used as keys ... only srcur>srcur_min will be kept subtractDark is not needed for data collected with waxscollect """ if os.path.isdir(fnameOrFolder): fname = findLogFile(fnameOrFolder) else: fname = fnameOrFolder log.info("Reading id9 logfile: %s" % fname) data = utils.files.readLogFile(fname,skip_first=skip_first,last=last,\ output = "array",converters=dict(delay=_delayToNum)) # work on darks if needed if subtractDark: ## find darks with open(fname, "r") as f: lines = f.readlines() lines = [line.strip() for line in lines] # look only for comment lines lines = [line for line in lines if line[0] == "#"] for line in lines: if line.find("pd1 dark/sec") >= 0: darks['pd1ic'] = _findDark(line) if line.find("pd2 dark/sec") >= 0: darks['pd2ic'] = _findDark(line) if line.find("pd3 dark/sec") >= 0: darks['pd3ic'] = _findDark(line) ## subtract darks for diode in ['pd1ic', 'pd2ic', 'pd3ic', 'pd4ic']: if diode in darks: data[diode] = data[diode] - darks[diode] * data['timeic'] # srcur filter if "currentmA" in data.dtype.names: idx_cur = data['currentmA'] > srcur_min if (idx_cur.sum() < idx_cur.shape[0] * 0.5): log.warn("Minimum srcur filter has kept only %.1f%%" % (idx_cur.sum() / idx_cur.shape[0] * 100)) log.warn("Minimum srcur: %.2f, median(srcur): %.2f" % (srcur_min, np.nanmedian(data["currentmA"]))) data = data[idx_cur] else: log.warn("Could not find currentmA in logfile, skipping filtering") info = DataStorage() # usually folders are named sample/run if os.path.isdir(fnameOrFolder): folder = fnameOrFolder else: folder = os.path.dirname(fnameOrFolder) dirs = folder.split(os.path.sep) ylabel = ".".join(dirs[-2:]) info.name = ".".join(dirs[-2:]) try: reprate = readReprate(fname) info.reprate = reprate ylabel += " %.2f Hz" % reprate except: print("Could not read rep rate info") try: time_info = timesToInfo(data['time']) info.duration = time_info ylabel += "\n" + time_info except: print("Could not read time duration info") info.ylabel = ylabel if asDataStorage: data = DataStorage( dict((name, data[name]) for name in data.dtype.names)) return data, info
def doFolder_dataRed(azavStorage, funcForAveraging=np.nanmean, outStorageFile='auto', reference='min', chi2_0_max='auto', saveTxt=True, first=None, last=None, idx=None, split_angle=False): """ azavStorage if a DataStorage instance or the filename to read """ if isinstance(azavStorage, DataStorage): azav = azavStorage folder = azavStorage.folder elif os.path.isfile(azavStorage): folder = os.path.dirname(azavStorage) azav = DataStorage(azavStorage) else: # assume is just a folder name folder = azavStorage azavStorage = folder + "/pyfai_1d" + default_extension azav = DataStorage(azavStorage) if split_angle: angles = np.unique(azav.log.angle) diffs = [] for angle in angles: idx = azav.log.angle == angle diffs.append( doFolder_dataRed(azav, funcForAveraging=funcForAveraging, outStorageFile=None, reference=reference, chi2_0_max=chi2_0_max, saveTxt=False, idx=idx, split_angle=False)) ret = DataStorage(angles=angles, diffs=diffs) if outStorageFile == 'auto': if not os.path.isdir(folder): folder = "./" outStorageFile = folder + "/diffs" + default_extension if outStorageFile is not None: ret.save(outStorageFile) return ret azav = copy.deepcopy(azav) if last is not None or first is not None and idx is None: idx = slice(first, last) if idx is not None: azav.log.delay = azav.log.delay[idx] azav.data_norm = azav.data_norm[idx] azav.err_norm = azav.err_norm[idx] # laser off is saved as -10s, if using the automatic "min" # preventing from using the off images # use reference=-10 if this is what you want if reference == "min": reference = azav.log.delay[azav.log.delay != -10].min() # calculate differences tr = dataReduction.calcTimeResolvedSignal( azav.log.delay, azav.data_norm, err=azav.err_norm, q=azav.q, reference=reference, funcForAveraging=funcForAveraging, chi2_0_max=chi2_0_max) tr.folder = folder tr.twotheta_rad = azav.twotheta_rad tr.twotheta_deg = azav.twotheta_deg tr.info = azav.pyfai_info if outStorageFile == 'auto': if not os.path.isdir(folder): folder = "./" outStorageFile = folder + "/diffs" + default_extension tr.filename = outStorageFile # save txt and npz file if saveTxt: dataReduction.saveTxt(folder, tr, info=azav.pyfai_info) if outStorageFile is not None: tr.save(outStorageFile) return tr
def doFolder(folder="./", files='*.edf*', nQ=1500, force=False, mask=None, dark=10, qlims=None, monitor='auto', save_pyfai=False, saveChi=True, poni='pyfai.poni', storageFile='auto', save=True, logDict=None, dezinger=None, skip_first=0, last=None, azimuth_range=None): """ calculate 1D curves from files in folder Parameters ---------- folder : str folder to work on files : str regular expression to look for ccd images (use edf* for including gzipped giles) nQ : int number of Q-points (equispaced) monitor : array or (qmin,qmax) or None normalization array (or list for q range normalization) force : True|False if True, redo from beginning even if previous data are found if False, do only new files mask : can be a list of [filenames|array of booleans|mask string] pixels that are True are dis-regarded saveChi : True|False if False, chi files (text based for each image) are not saved dezinger : None or 0<float<100 use pyfai function 'separate' to remove zingers. The value is the percentile used to find the liquicd baseline, 50 (i.e. median value) if a good approximation. Dezinger takes ~200ms per 4M pixel image. Needs good center and mask logDict : None or dictionary(-like) each key is a field. if given it has to have 'file' key poni : informationation necessary to build an AzimuthalIntegrator: → an AzimuthalIntegrator instance → a filename that will be look for in 1 'folder' first 2 in ../folder 3 in ../../folder .... n-1 in pwd n in homefolder → a dictionary (use to bootstrap an AzimuthalIntegrator using AzimuthalIntegrator(**poni) save_pyfai : True|False if True, it stores all pyfai's internal arrays (~110 MB) skip_first : int skip the first images (the first one is sometime not ideal) last : int skip evey image after 'last' """ func = inspect.currentframe() args = inspect.getargvalues(func) files_reg = files # store argument for saving .. args = dict([(arg, args.locals[arg]) for arg in args.args]) folder = folder.replace("//", "/").rstrip("/") # can't store aritrary objects if isinstance(args['poni'], pyFAI.azimuthalIntegrator.AzimuthalIntegrator): args['poni'] = ai_as_dict(args['poni']) if storageFile == 'auto': fname = "pyfai_1d" + g_default_extension if not os.path.isdir(folder): # do not overide folder, it might be useful storageFile = os.path.join(".", fname) else: storageFile = os.path.join(folder, fname) if os.path.isfile(storageFile) and not force: saved = DataStorage(storageFile) log.info("Found %d images in storage file" % saved.data.shape[0]) ai = getAI(poni, folder) # consistency check (saved images done with same parameters ?) if ai is not None: # pyfai cannot be compared (except for its string representation) # because before first image some fields are None keys_to_compare = "nQ mask dark dezinger skip_first last" keys_to_compare = keys_to_compare.split() # recursively transform in plain dict and limit comparison to given keys saved_args = DataStorage(saved.args).toDict() now_args = DataStorage(args).toDict() saved_args = dict([(k, saved_args[k]) for k in keys_to_compare]) now_args = dict([(k, now_args[k]) for k in keys_to_compare]) if (not compare_pyfai(saved.pyfai,ai)) or \ np.any( saved.mask != interpretMasks(mask,saved.mask.shape)) or \ not utils.is_same(saved_args,now_args) : log.warn( "Found inconsistency between curves already saved and new ones" ) log.warn("Redoing saved ones with new parameters") if (saved.pyfai_info != ai_as_str(ai)): log.warn("pyfai parameters changed from:\n%s" % saved.pyfai_info + "\nto:\n%s" % ai_as_str(ai)) if np.any( saved.mask != interpretMasks(mask, saved.mask.shape)): log.warn("Mask changed from:\n%s" % saved.mask + "\nto:\n%s" % interpretMasks(mask, saved.mask.shape)) if not utils.is_same(saved_args, now_args): for k in set(now_args.keys()) - set(['mask']): if not utils.is_same(saved_args[k], now_args[k]): if isinstance(saved_args[k], dict): for kk in saved_args[k].keys(): if not utils.is_same( saved_args[k][kk], now_args[k][kk]): log.warn( "Parameter %s.%s" % (k, kk) + "IS DIFFERENT", saved_args[k][kk], now_args[k][kk]) else: log_str = " %s to %s" % (saved_args[k], now_args[k]) if len(log_str) > 20: log_str = ":\n%s\nto:\n%s" % ( saved_args[k], now_args[k]) log.warn("Parameter '%s' changed from" % k + log_str) args['force'] = True saved = doFolder(**args) else: saved = None files = utils.getFiles(folder, files) if logDict is not None: files = [f for f in files if utils.getBasename(f) in logDict['file']] # sometime one deletes images but not corresponding lines in logfiles... if len(files) < len(logDict['file']): basenames = np.asarray([utils.getBasename(file) for file in files]) idx_to_keep = np.asarray([f in basenames for f in logDict['file']]) for key in logDict.keys(): logDict[key] = logDict[key][idx_to_keep] log.warn( "More files in log than actual images, truncating loginfo") files = files[skip_first:last] if saved is not None: files = [ f for f in files if utils.getBasename(f) not in saved["files"] ] log.info("Will do azimuthal integration for %d files" % (len(files))) files = np.asarray(files) basenames = np.asarray([utils.getBasename(file) for file in files]) if len(files) > 0: # which poni file to use: ai = getAI(poni, folder) _msg = "could not interpret poni info or find poni file" if ai is None: raise ValueError(_msg) shape = read(files[0]).shape mask = interpretMasks(mask, shape) data = np.empty((len(files), nQ)) err = np.empty((len(files), nQ)) pbar = utils.progressBar(len(files)) for ifname, fname in enumerate(files): img = read(fname) q, i, e = do1d(ai, img, mask=mask, npt_radial=nQ, dark=dark, dezinger=dezinger, azimuth_range=azimuth_range) data[ifname] = i err[ifname] = e if saveChi: chi_fname = utils.removeExt(fname) + ".chi" utils.saveTxt(chi_fname, q, np.vstack((i, e)), info=ai_as_str(ai), overwrite=True) pbar.update(ifname + 1) pbar.finish() if saved is not None: files = np.concatenate((saved.orig.files, basenames)) data = np.concatenate((saved.orig.data, data)) err = np.concatenate((saved.orig.err, err)) else: files = basenames twotheta_rad = utils.qToTwoTheta(q, wavelength=ai.wavelength * 1e10) twotheta_deg = utils.qToTwoTheta(q, wavelength=ai.wavelength * 1e10, asDeg=True) orig = dict(data=data.copy(), err=err.copy(), q=q.copy(), twotheta_deg=twotheta_deg, twotheta_rad=twotheta_rad, files=files) ret = dict(folder=folder, files=files, orig=orig, pyfai=ai_as_dict(ai), pyfai_info=ai_as_str(ai), mask=mask, args=args) if not save_pyfai: ret['pyfai']['chia'] = None ret['pyfai']['dssa'] = None ret['pyfai']['qa'] = None ret['pyfai']['ttha'] = None ret = DataStorage(ret) else: ret = saved if ret is None: return None if qlims is not None: idx = (ret.orig.q >= qlims[0]) & (ret.orig.q <= qlims[1]) else: idx = np.ones_like(ret.orig.q, dtype=bool) ret.orig.twotheta_deg = utils.qToTwoTheta(ret.orig.q, wavelength=ai.wavelength * 1e10, asDeg=True) ret.orig.twotheta_rad = utils.qToTwoTheta(ret.orig.q, wavelength=ai.wavelength * 1e10) ret.data = ret.orig.data[:, idx] ret.err = ret.orig.err[:, idx] ret.q = ret.orig.q[idx] ret.twotheta_rad = ret.orig.twotheta_rad[idx] ret.twotheta_deg = ret.orig.twotheta_deg[idx] if isinstance(monitor, str): if monitor == 'auto': monitor = ret.data.mean(1) else: raise ValueError( "'monitor' must be ndarray, 2-D tuple/list, 'auto' or None.") elif isinstance(monitor, (tuple, list)): if len(monitor) == 2: idx_norm = (ret.q >= monitor[0]) & (ret.q <= monitor[1]) monitor = ret.data[:, idx_norm].mean(1) else: raise ValueError( "'monitor' must be ndarray, 2-D tuple/list, 'auto' or None.") elif not isinstance(monitor, np.ndarray) and monitor is not None: raise ValueError( "'monitor' must be ndarray, 2-D tuple/list, 'auto' or None.") if monitor is not None: ret["data_norm"] = ret.data / monitor[:, np.newaxis] ret["err_norm"] = ret.err / monitor[:, np.newaxis] ret["monitor"] = monitor[:, np.newaxis] else: ret["data_norm"] = None ret["err_norm"] = None ret["monitor"] = None # add info from logDict if provided if logDict is not None: ret['log'] = logDict # sometime saving is not necessary (if one has to do it after subtracting background if storageFile is not None and save: ret.save(storageFile) return ret
def create_db(self): path = self.get_db_path() ds = DataStorage(path) ds.create_db() return ds, path
def __init__(self): """Prepare UnisonHandler to manage unison instances. Parameters ---------- none Returns ------- null Throws ------- none Doctests ------- """ self.import_config() # Set up configuration # Register exit handler atexit.register(self.exit_handler) # Set up logging self.logger = logging.getLogger('unisonctrl') self.logger.setLevel(logging.INFO) # Set up main log file logging logFileFormatter = logging.Formatter( fmt='[%(asctime)-s] %(levelname)-9s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') # Size based log rotation if (self.config['rotate_logs'] == "size"): logfileHandler = logging.handlers.RotatingFileHandler( self.config['unisonctrl_log_dir'] + os.sep + 'unisonctrl.log', # maxBytes=50000000, # 50mb maxBytes=5000, # 50mb backupCount=20) # Timed log rotation elif (self.config['rotate_logs'] == "time"): logfileHandler = logging.handlers.TimedRotatingFileHandler( self.config['unisonctrl_log_dir'] + os.sep + 'unisonctrl.log', when="midnight", backupCount=14, # Keep past 14 days ) # No log rotation elif (self.config['rotate_logs'] == "off"): logfileHandler = logging.FileHandler() else: logfileHandler = logging.FileHandler() logfileHandler.setLevel(logging.DEBUG) logfileHandler.setFormatter(logFileFormatter) self.logger.addHandler(logfileHandler) # Send logs to console when running consoleFormatter = logging.Formatter( '[%(asctime)-22s] %(levelname)s : %(message)s') consoleHandler = logging.StreamHandler() consoleHandler.setLevel(logging.INFO) consoleHandler.setFormatter(consoleFormatter) self.logger.addHandler(consoleHandler) # Disabling debugging on the storage layer, it's no longer needed self.data_storage = DataStorage(False, self.config) self.logger.info("UnisonCTRL Starting") # Clean up dead processes to ensure data files are in an expected state self.cleanup_dead_processes()
class UnisonHandler(): """Starts, stops and monitors unison instances.""" # Object for data storage backend data_storage = None # configuration values config = {} # Enables extra output INFO = True # Logging Object # logging # self.config['unisonctrl_log_dir'] + os.sep + "unisonctrl.log" # self.config['unisonctrl_log_dir'] + os.sep + "unisonctrl.error" def __init__(self): """Prepare UnisonHandler to manage unison instances. Parameters ---------- none Returns ------- null Throws ------- none Doctests ------- """ self.import_config() # Set up configuration # Register exit handler atexit.register(self.exit_handler) # Set up logging self.logger = logging.getLogger('unisonctrl') self.logger.setLevel(logging.INFO) # Set up main log file logging logFileFormatter = logging.Formatter( fmt='[%(asctime)-s] %(levelname)-9s : %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') # Size based log rotation if (self.config['rotate_logs'] == "size"): logfileHandler = logging.handlers.RotatingFileHandler( self.config['unisonctrl_log_dir'] + os.sep + 'unisonctrl.log', # maxBytes=50000000, # 50mb maxBytes=5000, # 50mb backupCount=20) # Timed log rotation elif (self.config['rotate_logs'] == "time"): logfileHandler = logging.handlers.TimedRotatingFileHandler( self.config['unisonctrl_log_dir'] + os.sep + 'unisonctrl.log', when="midnight", backupCount=14, # Keep past 14 days ) # No log rotation elif (self.config['rotate_logs'] == "off"): logfileHandler = logging.FileHandler() else: logfileHandler = logging.FileHandler() logfileHandler.setLevel(logging.DEBUG) logfileHandler.setFormatter(logFileFormatter) self.logger.addHandler(logfileHandler) # Send logs to console when running consoleFormatter = logging.Formatter( '[%(asctime)-22s] %(levelname)s : %(message)s') consoleHandler = logging.StreamHandler() consoleHandler.setLevel(logging.INFO) consoleHandler.setFormatter(consoleFormatter) self.logger.addHandler(consoleHandler) # Disabling debugging on the storage layer, it's no longer needed self.data_storage = DataStorage(False, self.config) self.logger.info("UnisonCTRL Starting") # Clean up dead processes to ensure data files are in an expected state self.cleanup_dead_processes() def run(self): """General wrapper to ensure running instances are up to date. Parameters ---------- none Returns ------- list PIDs of dead unison instances which we thought were running. Throws ------- none """ self.create_all_sync_instances() def create_all_sync_instances(self): """Create multiple sync instances from the config and filesystem info. Parameters ---------- none Returns ------- list PIDs of dead unison instances which we thought were running. Throws ------- none """ # Get directories to sync dirs_to_sync_by_sync_instance = self.get_dirs_to_sync( self.config['sync_hierarchy_rules']) # Store all known running sync instances here to potentially kill later # unhandled_sync_instances = copy.deepcopy(dirs_to_sync_by_sync_instance) unhandled_sync_instances = copy.deepcopy( self.data_storage.running_data) # Loop through each entry in the dict and create a sync instance for it for instance_name, dirs_to_sync in dirs_to_sync_by_sync_instance.items( ): # Mark this instance as handled so it's not killed later unhandled_sync_instances.pop(instance_name, None) # Make new sync instance self.create_sync_instance(instance_name, dirs_to_sync) # Kill any instances in unhandled_sync_instances, because they are # no longer required needed for inst_to_kill in unhandled_sync_instances: self.logger.debug("Cleaning up instance '" + inst_to_kill + "'" + " which is no longer needed.") self.kill_sync_instance_by_pid( self.data_storage.running_data[inst_to_kill]['pid']) def get_dirs_to_sync(self, sync_hierarchy_rules): """Start a new sync instance with provided details. # Parses the filesystem, and lists l Parameters ---------- Pass through sync_hierarchy_rules from config Returns ------- dict (nested) [syncname] - name of the sync name for this batch ['sync'] - directories to sync in this instance ['ignore'] - directories to ignore in this instance Throws ------- none """ # Contains the list of directories which have been handled by the loop # so future iterations don't duplicate work handled_dirs = [] # Contains list which is built up within the loop and returned at the # end of the method all_dirs_to_sync = {} self.logger.debug("Processing directories to sync. " + str(len(sync_hierarchy_rules)) + " rules to process.") for sync_instance in sync_hierarchy_rules: self.logger.debug("Instance '" + sync_instance['syncname'] + "' " + "Parsing rules and directories.") # Find full list expr = (self.config['unison_local_root'] + os.sep + sync_instance['dir_selector']) # Get full list of glob directories all_dirs_from_glob = glob.glob(self.sanatize_path(expr)) # Remove any dirs already handled in a previous loop, unless # overlap is set if ('overlap' not in sync_instance or sync_instance['overlap'] is False): self.logger.debug("Instance '" + sync_instance['syncname'] + "' " + "Removing already handled directories.") before = len(all_dirs_from_glob) all_unhandled_dirs_from_glob = [ x for x in all_dirs_from_glob if x not in handled_dirs ] after = len(all_unhandled_dirs_from_glob) # Log event if the duplication handler remove directories # Added 'False and' to disable this section. TMI in the logs if (before != after): self.logger.debug("Instance '" + sync_instance['syncname'] + "' " + "Parse result: " + str(before) + " dirs down to " + str(after) + " dirs by removing already handled dirs") # By default, use 'name_highfirst' if 'sort_method' not in sync_instance: sync_instance['sort_method'] = 'name_highfirst' # Apply sort if sync_instance['sort_method'] == 'name_highfirst': sorted_dirs = sorted(all_unhandled_dirs_from_glob, reverse=True) elif sync_instance['sort_method'] == 'name_lowfirst': sorted_dirs = sorted(all_unhandled_dirs_from_glob) # Add other sort implementations here later, if wanted else: # Message for exception and self.logger msg = ("'" + sync_instance['sort_method'] + "'" + " is not a valid sort method on sync instance " + "'" + sync_instance['syncname'] + "'. " + "Instance will not be created.") # Send message to self.logger self.logger.warn(msg) # Uncomment this to raise an exception instead of returning blank # raise ValueError(msg) # Return blank dir set, since sort was invalid return {} # Apply sort_count, if it's set if 'sort_count' in sync_instance: if (not isinstance(sync_instance['sort_count'], int)): # if not int, throw warning self.logger.warning( "Instance '" + sync_instance['syncname'] + "' " + "sort_count '" + str(sync_instance['sort_count']) + "'" + " is not castable to int. Setting sort_count to a " + "default of '3'.") # Then set a default sync_instance['sort_count'] = 3 else: # If it's a valid int, use it self.logger.debug("Instance '" + sync_instance['syncname'] + "' " + "sort_count set at " + str(sync_instance['sort_count']) + ".") dirs_to_sync = list( itertools.islice(sorted_dirs, 0, sync_instance['sort_count'], 1)) else: # if sort_count is not set, sync all dirs dirs_to_sync = sorted_dirs # Add all these directories to the handled_dirs so they aren't # duplicated later handled_dirs += dirs_to_sync # add dirs to final output nested dict if len(dirs_to_sync) > 0: all_dirs_to_sync[sync_instance['syncname']] = dirs_to_sync self.logger.debug("Instance '" + sync_instance['syncname'] + "' " + "Syncing " + str(len(dirs_to_sync)) + " directories.") # Shouldn't need this, except when in deep debugging # If you need it, turn it on if (False): dirstr = "\n ".join(dirs_to_sync) print(sync_instance['syncname'] + " directories :\n " + dirstr + "\n\n") self.logger.debug("Sync rule parsing complete. " + "Syncing " + str(len(handled_dirs)) + " explicit directories " + "in all instances combined") # Shouldn't need this, except when in deep debugging # If you need it, turn it on if (False): print("All directories synced :\n " + "\n ".join(handled_dirs)) return all_dirs_to_sync def create_sync_instance(self, instance_name, dirs_to_sync): """Start a new sync instance with provided details, if not already there. Parameters ---------- dict List of directories to sync with each instance. The key of the dict becomes the name of the sync instance. The value of the dict becomes the list of directories to sync with that instance. Returns ------- bool True if new instance was created False if no new instance was needed Throws ------- none """ # TODO: check global config hash here too, not just instance-specific config self.logger.debug("Processing instance '" + instance_name + "' , deciding whether" + "to kill or not") # Obtain a hash of the requested config to be able to later check if # the instance should be killed and restarted or not. # This hash will be stored with the instance data, and if it changes, # the instance will be killed and restarted so that new config can be # applied. config_hash = hashlib.sha256(( # Include the instance name in the config hash str(instance_name) + # Include the directories to sync in the config hash str(dirs_to_sync) + # Include the global config in the config hash str(self.config['global_unison_config_options'] )).encode('utf-8')).hexdigest() # Get data from requested instance, if there is any requested_instance = self.data_storage.get_data(instance_name) if requested_instance is None: # No instance data found, must start new one self.logger.info( "Instance '" + instance_name + "' " + "No instance data found, starting new sync instance.") elif requested_instance['config_hash'] == config_hash: # Existing instance data found, still uses same config - no restart self.logger.debug("Instance '" + instance_name + "' " + "Instance data found, config still unchanged.") return False else: # Existing instance data found, but uses different config, so restarting self.logger.info( "Instance '" + instance_name + "' " + "Instance data found, but config or directories to sync have" + " changed. Restarting instance.") self.kill_sync_instance_by_pid(requested_instance['pid']) self.data_storage.remove_data(requested_instance['syncname']) # Process dirs into a format for unison command line arguments dirs_for_unison = [] trimmed_dirs = [] amount_to_clip = (len(self.config['unison_local_root']) + 1) for dir in dirs_to_sync: # Clip off directory from local root dir_trimmed = dir[amount_to_clip:] # Format for unison command line args pathstr = "-path=" + dir_trimmed + "" # Append to list for args dirs_for_unison.append(pathstr) # Append to list for config storage trimmed_dirs.append(dir_trimmed) # Basic verification check (by no means complete) # Ensure local root exists if not os.path.isdir(self.config['unison_local_root']): raise IOError("Local root directory does not exist") # Convert SSH config info into connection string remote_path_connection_string = ( "" + "ssh://" + str(self.config['unison_remote_ssh_conn']) + "/" + str(self.config['unison_remote_root']) + "") # todo: add '-label' here # print(remote_path_connection_string) # Check if SSH config key is specified if self.config['unison_remote_ssh_keyfile'] == "": # Key is not specified, don't use it # TODO: reformat this entry self.logger.debug("SSH key not specified") else: # Key is specified # TODO: reformat this entry self.logger.debug("Key specified: " + self.config['unison_remote_ssh_keyfile']) remote_path_connection_string = ( remote_path_connection_string + " -sshargs='-i " + self.config['unison_remote_ssh_keyfile'] + "'") # print(remote_path_connection_string) # Set env vars to pass to unison envvars = { 'UNISONLOCALHOSTNAME': self.config['unison_local_hostname'], 'HOME': self.config['unison_home_dir'], 'USER': self.config['unison_user'], 'LOGNAME': self.config['unison_user'], 'PWD': self.config['unison_home_dir'], } logfile = self.config[ 'unison_log_dir'] + os.sep + instance_name + ".log" self.touch(logfile) # Start unison cmd = ([self.config['unison_path']] + ["" + str(self.config['unison_local_root']) + ""] + [remote_path_connection_string] + ["-label=unisonctrl-" + instance_name] + dirs_for_unison + self.config['global_unison_config_options'] + ["-log=true"] + ["-logfile=" + logfile]) # self.logger.info(" ".join(cmd)) running_instance_pid = subprocess.Popen( cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, # close_fds=True, env=envvars).pid instance_info = { "pid": running_instance_pid, "syncname": instance_name, "config_hash": config_hash, "dirs_to_sync": trimmed_dirs } self.logger.info("New instance '" + instance_name + "' " + " (PID " + str(instance_info['pid']) + ").") # Store instance info self.data_storage.set_data(instance_name, instance_info) # New instance was created, return true return True def touch(self, fname, mode=0o644, dir_fd=None, **kwargs): """Python equuivilent for unix "touch". Paramaters ------- str filename to touch Throws ------- none Returns ------- none Throws ------- none Doctests ------- """ flags = os.O_CREAT | os.O_APPEND with os.fdopen(os.open(fname, flags=flags, mode=mode, dir_fd=dir_fd)) as f: os.utime(f.fileno() if os.utime in os.supports_fd else fname, dir_fd=None if os.supports_fd else dir_fd, **kwargs) with open(fname, 'a'): os.utime(fname, None) def kill_sync_instance_by_pid(self, pid): """Kill unison instance by it's PID. Includes built-in protection for accidentally killing a non-unison program, and even other unison programs not started with this script. This ensures that this function will never kill a PID that we have not started with unisonctrl. Paramaters ------- int pid to kill - must be a PID started in this process Throws ------- none Returns ------- none Throws ------- none Doctests ------- """ # Get the list of known pids to ensure we only kill one of those running_data = self.data_storage.running_data self.logger.debug("Attempting to kill PID '" + str(pid) + "'") known_pids = [] # Gets PIDs of all the known unison processes known_pids = [int(running_data[d]['pid']) for d in running_data] # TODO: Rewrite this function, it can probably be done with reduce() # RESOLUTION: Rewritten above, this kept in case it doesn't work # for entry in running_data: # running_data[entry] # known_pids.append(int(running_data[entry]['pid'])) # TODO: Finish this error checking logic here, currently it doesn't check the PID # Try and kill with sigint (same as ctrl+c), if we are allowed to # First make sure the process exists if not psutil.pid_exists(pid): self.logger.info("PID " + str(pid) + " was not found. Perhaps already dead?") return # Then make sure it's a process we started elif pid not in known_pids: shortmsg = ("PID #" + str(pid) + " is not managed by UnisonCTRL. " + "Refusing to kill. See logs for more information.") longmsg = ( "PID #" + str(pid) + " is not managed by UnisonCTRL. " + "Refusing to kill. Your data files are likely corrupted. " + "Kill all running unison instances on this system, " + "delete everything in '" + self.config['running_data_dir'] + "/*', and run UnisonCTRL again.") self.logger.critical(longmsg) raise RuntimeError(shortmsg) # Finally, kill the process if it exists and we started it else: return self.kill_pid(pid) def kill_pid(self, pid): """Kill a process by it's PID. Starts with SIGINT (ctrl + c), then waits 6 seconds, checking every 1/3 second. If it doesn't die after another 6 seconds, it is attempted to be killed with psutil.terminate, then psutil.kill. Parameters ---------- int PID of a process to kill Returns ------- None Throws ------- none """ # Ensure it still exists before continuing if not psutil.pid_exists(pid): return # If it did not die nicely, get stronger about killing it p = psutil.Process(pid) # Try terminating, wait 3 seconds to see if it dies p.terminate() # SIGTERM psutil.wait_procs([p], timeout=3) # Ensure it still exists before continuing if not psutil.pid_exists(pid): self.logger.debug("PID " + str(pid) + " was killed with SIGTERM successfully.") return # Try hard killing, wait 3 seconds to see if it dies p.kill() # SIGKILL psutil.wait_procs([p], timeout=3) self.logger.info("PID " + str(pid) + " could not be killed with SIGTERM, and " + "was killed with SIGKILL.") return def cleanup_dead_processes(self): """Ensure all expected processes are still running. Checks the running_data list against the current PID list to ensure all expected processes are still running. Note that if everything works as expected and does not crash, there should never be dead instances. As such, if dead instances appear on a regular basis, consider digging into *why* they are appearing. Parameters ---------- none Returns ------- list PIDs of dead unison instances which we thought were running. Throws ------- none """ # Get the list of processes we know are running and we think are running # Also, convert each PID to int to make sure we can compare actually_running_processes = self.get_running_unison_processes() l = self.data_storage.running_data supposedly_running_processes = [int(l[d]['pid']) for d in l] # Find which instances we think are running but aren't dead_instances = [ x for x in supposedly_running_processes if x not in actually_running_processes ] # Note: if nothing crashes, dead instances should never exist. if (len(dead_instances) > 0): self.logger.warn("Found " + str(len(dead_instances)) + " unexpected dead " + "instances. Cleaning up data files now.") else: self.logger.debug("Found " + str(len(dead_instances)) + " unexpected dead " + "instances to clean up.") # Remove data on dead instances for instance_id in dead_instances: process = self.get_process_info_by_pid(instance_id) self.logger.debug("Removing data on '" + str(process['syncname']) + "' " + "because it is not running as expected.") self.data_storage.remove_data(process['syncname']) def get_process_info_by_pid(self, pid): """Return the syncname of a process given it's PID. Parameters ---------- int PID of desired process Returns ------- dict the full details of the sync process specified by the PID Throws ------- none """ # TODO: discuss if self.logger needs to happen here? I think not? -BY for process in self.data_storage.running_data: if self.data_storage.running_data[process]['pid'] == pid: return self.data_storage.running_data[process] def get_running_unison_processes(self): """Return PIDs of currently running unison instances. Parameters ---------- none Returns ------- list[int] PIDs of unison instances, empty list Throws ------- none """ # Get PIDs # Note: throws exception if no instances exist try: pids = str(subprocess.check_output(["pidof", '/usr/bin/unison'])) # Parse command output into list by removing junk chars and exploding # string with space delimiter pids = pids[2:-3].split(' ') except subprocess.CalledProcessError: # If error caught here, no unison instances are found running pids = [] self.logger.debug("Found " + str(len(pids)) + " running instances on this system: PIDs " + ", ".join(pids)) # Return, after converting to ints return list(map(int, pids)) def import_config(self): """Import config from config, and apply details where needed. Parameters ---------- none Returns ------- True if success Throws ------- 'LookupError' if config is invalid. """ # Get the config file import config # Get all keys from keyvalue pairs in the config file settingsFromConfigFile = [ x for x in dir(config) if not x.startswith('__') ] # Convert config file into dict for key in settingsFromConfigFile: value = getattr(config, key) self.config[key] = value # Settings validation: specify keys which are valid settings # If there are rows in the config file which are not listed here, an # error will be raised validSettings = { 'data_dir', 'running_data_dir', 'unison_log_dir', 'unisonctrl_log_dir', 'log_file', 'make_root_directories_if_not_found', 'sync_hierarchy_rules', 'unison_local_root', 'unison_remote_root', 'unison_path', 'global_unison_config_options', 'unison_remote_ssh_conn', 'unison_remote_ssh_keyfile', 'unison_local_hostname', 'unison_home_dir', 'unison_user', 'webhooks', 'rotate_logs', } # If a setting contains a directory path, add it's key here and it will # be sanatized (whitespace and trailing whitespaces stripped) settingPathsToSanitize = { 'data_dir', 'unison_home_dir', 'running_data_dir', 'unison_log_dir', 'unisonctrl_log_dir', } # Values here are used as config values unless overridden in the # config.py file defaultSettings = { 'data_dir': '/tmp/unisonctrl', 'log_file': '/dev/null', 'make_root_directories_if_not_found': True, 'unison_path': '/usr/bin/unison', # Default ubuntu path for unison 'unison_remote_ssh_keyfile': "", 'unison_local_hostname': platform.node(), 'running_data_dir': self.config['data_dir'] + os.sep + "running-sync-instance-information", 'unison_log_dir': self.config['data_dir'] + os.sep + "unison-logs", 'unisonctrl_log_dir': self.config['data_dir'] + os.sep + "unisonctrl-logs", 'unison_user': getpass.getuser(), 'rotate_logs': "time", } # TODO: Implement allowedSettings, which force settings to be # in a given list of options # Apply default settings to fill gaps between explicitly set ones for key in defaultSettings: if (key not in self.config): self.config[key] = defaultSettings[key] # Ensure all required keys are specified for key in validSettings: if (key not in self.config): raise LookupError("Required config entry '" + key + "' not specified") # Ensure no additional keys are specified for key in self.config: if (key not in validSettings): raise LookupError("Unknown config entry: '" + key + "'") # Sanatize directory paths for key in settingPathsToSanitize: self.config[key] = self.sanatize_path(self.config[key]) # If you reach here, configuration was read and imported without error return True def sanatize_path(self, path): """Sanitize directory paths by removing whitespace and trailing slashes. Currently only tested on Unix, but should also work on Windows. TODO: Test on windows to ensure it works properly. Parameters ---------- 1) str directory path to sanatize Returns ------- str sanatized directory path Throws ------- none Doctests ------- >>> US = UnisonHandler(False) >>> US.sanatize_path(" /extra/whitespace ") '/extra/whitespace' >>> US.sanatize_path("/dir/with/trailing/slash/") '/dir/with/trailing/slash' >>> US.sanatize_path(" /dir/with/trailing/slash/and/whitepace/ ") '/dir/with/trailing/slash/and/whitepace' >>> US.sanatize_path(" /dir/with/many/trailing/slashes//// ") '/dir/with/many/trailing/slashes' """ # Remove extra whitespace path = path.strip() # Remove slash from end of path path = path.rstrip(os.sep) return path def exit_handler(self): """Is called on exit automatically. Paramaters ------- none Throws ------- none Returns ------- none Throws ------- none Doctests ------- """ self.logger.debug("Starting script shutdown in the class " + self.__class__.__name__) # Clean up dead processes before exiting self.cleanup_dead_processes() """ print("FAKELOG: [" + time.strftime("%c") + "] [UnisonCTRL] Exiting\n") """ self.logger.debug("Script shutdown complete in class " + self.__class__.__name__) self.logger.info("Exiting UnisonCTRL")
from io import StringIO import csv import re import logging import json import datetime from ktokenizer import KTokenizer, tokenize from compositedict import CompositeDictionary from datastorage import DataStorage from settings import Settings from googledict import GoogleDictionary app = Flask(__name__) datastorage = DataStorage("../_kreader_files/kreader.db") datastorage.create_db() ktokenizer = None composite_dict = CompositeDictionary(True) google_dict = GoogleDictionary() Textdesc = namedtuple('Textdesc', ['id', 'title', 'total_words', 'unique_words']) Worddesc = namedtuple('Worddesc', ['id', 'word', 'definitions', 'added_min_ago', 'title', 'left_context', 'context_word', 'right_context']) @app.teardown_request def remove_session(ex=None): datastorage.remove_session() @app.route("/")
DailyPrices = DatabaseDailyPrices(base) DailyPrices.new() DailyPrices.tickers = [(1, 'BNP', '.PA') ] #,(2,'GSZ','.PA'),(3,'EDF','.PA')] DailyPrices.get_prices() DailyPrices.update_prices() Trivial = queue.Queue() DataManager1 = SQLDataManagerBacktest(Trivial, DailyPrices, tickers, datetime(2014, 1, 1), datetime(2014, 2, 20)) DataManager1.market() DataStorage1 = DataStorage(tickers) Strategy1 = BuyandHoldStrategy(DataManager1, DataStorage1, Trivial) # Strategy1 = MovingAverageStrategy(DataManager1,DataStorage1,Trivial,5,10) while True: if DataManager1.continue_backtest == True: DataManager1.next_bar() else: break while True: try: event = Trivial.get(False) except: break
def averageScanPoints(scan,data,errAbs=None,isRef=None,lpower=None, useRatio=False,funcForAveraging=np.nanmean,chi2_0_max='auto'): """ Average data for equivalent values in 'scan' array given scanpoints in 'scan' and corresponding data in 'data' average all data corresponding the exactly the same scanpoint. If the values in scan are coming from a readback, rounding might be necessary. Parameters ---------- scan : array(N) array of scan points data : array(N,M) array of data to average, first axis correspond to scan index errAbs : None or array as data errbar for each data point. if None take the standard deviation over images in given scan point isRef : None or array(N) if None no reference is subtracted. if array, True indicate that a particular image is a reference one lpower : None or array(N) if not None, time resolved difference or ratio is normalized by it useRatio : bool use True if you want to calculate ratio ( I_{on}/I_{ref} ) instead of I_{on} - I_{off} funcForAveraging: function accepting axis=int keyword argument is usually np.nanmean or np.nanmedian. chi2_0_max = None, "auto" or float simple chi2_0 threshold filter. use trx.filters for more advanced ones. If auto, define max as 95% percentle. if None it is not applied Returns ------- DataStorage instance with all info """ args = dict( isRef = isRef, lpower = lpower, useRatio = useRatio ) data = data.astype(np.float) average = np.mean(data,axis=0) median = np.median(data,axis=0) if isRef is None: isRef = np.zeros( data.shape[0], dtype=bool ) isRef = np.asarray(isRef).astype(bool) assert data.shape[0] == isRef.shape[0], \ "Size mismatch, data is %d, isRef %d"%(data.shape[0],isRef.shape[0]) # subtract reference only is there is at least one if isRef.sum()>0: # create a copy (subtractReferences works in place) diff_all = subtractReferences(data.copy(),np.argwhere(isRef), useRatio=useRatio) ref_average = funcForAveraging(data[isRef],axis=0) else: diff_all = data ref_average = np.zeros_like(average) # normalize signal for laser intensity if provided if lpower is not None: lpower = utils.reshapeToBroadcast(lpower,data) if useRatio is False: diff_all /= lpower else: diff_all = (diff_all-1)/lpower+1 scan_pos = np.unique(scan) shape_out = [len(scan_pos),] + list(diff_all.shape[1:]) diffs = np.empty(shape_out) diff_err = np.empty(shape_out) diffs_in_scan = [] chi2_0 = [] for i,t in enumerate(scan_pos): shot_idx = (scan == t) # & ~isRef if shot_idx.sum() == 0: log.warn("No data to average for scan point %s"%str(t)) # select data for the scan point diff_for_scan = diff_all[shot_idx] if errAbs is not None: noise = np.nanmean(errAbs[shot_idx],axis = 0) else: noise = np.nanstd(diff_for_scan, axis = 0) # if it is the reference take only every second ... if np.all( shot_idx == isRef ): diff_for_scan = diff_for_scan[::2] diffs_in_scan.append( diff_for_scan ) # calculate average diffs[i] = funcForAveraging(diff_for_scan,axis=0) # calculate chi2 of different repetitions chi2 = np.power( (diff_for_scan - diffs[i])/noise,2) # sum over all axis but first for _ in range(diff_for_scan.ndim-1): chi2 = np.nansum( chi2, axis=-1 ) # store chi2_0 chi2_0.append( chi2/diffs[i].size ) # store error of mean diff_err[i] = noise/np.sqrt(shot_idx.sum()) ret = dict(scan=scan_pos,diffs=diffs,err=diff_err, chi2_0=chi2_0,diffs_in_scan=diffs_in_scan, ref_average = ref_average, diffs_plus_ref=diffs+ref_average, average=average,median=median,args=args) ret = DataStorage(ret) if chi2_0_max is not None: ret = filters.chi2Filter(ret,threshold=chi2_0_max) ret = filters.applyFilters(ret) return ret
def find_center_liquid_peak(img, X=None, Y=None, mask=None, percentile=(90, 99), plot=False): """ Find beam center position fitting a ring (usually liquid peak) Parameters ========== img: array or string image to use, if string, reads it with fabio X,Y: None or arrays position of center of pixels, if given they will have to have same shape as img, if None, they will be created with meshgrid mask: boolean mask or something trx.mask.interpretMasks can understand True are pixels to mask out percentile: tuple range of intensity to use (in percentile values) """ # interpret inputs img = _prepare_img(img) mask = _prepare_mask(mask, img) if mask is not None and not isinstance(mask, np.ndarray): mask = interpretMasks(mask, img.shape) if mask is not None: img[mask] = np.nan zmin, zmax = np.nanpercentile(img.ravel(), percentile[:2]) shape = img.shape idx = (img >= zmin) & (img <= zmax) if X is None or Y is None: _use_imshow = True X, Y = np.meshgrid(range(shape[1]), range(shape[0])) else: _use_imshow = False xfit = X[idx].ravel() yfit = Y[idx].ravel() fit = leastsq_circle(xfit, yfit) xc = fit.center[0] yc = fit.center[1] R = fit.radius if plot: ax = plt.gca() cmin = np.nanmin(img) if _use_imshow: plt.imshow(img, clim=(cmin, zmin), cmap=plt.cm.gray) # RGBA img = np.zeros((img.shape[0], img.shape[1], 4)) img[idx, 0] = 1 img[idx, 3] = 1 plt.imshow(img) else: plt.pcolormesh(X, Y, img, cmap=plt.cm.gray, vmin=cmin, vmax=zmin) img = np.ma.masked_array(img, idx) plt.pcolormesh(X, Y, img) circle = plt.Circle((xc, yc), radius=R, lw=5, color='green', fill=False) ax.add_artist(circle) plt.plot(xc, yc, "o", color="green", markersize=5) return DataStorage(xc=xc, yc=yc, R=R, x=xfit, y=yfit)
stats = get_min_max_avg(relevant_files) print_stats_table(stats) elif len(sys.argv) > 1 and sys.argv[1] == "ranges": print_ranges() elif len(sys.argv) > 1 and sys.argv[1] == "dbm": print("Converting sky/z1 files to dbm") convert_to_dbm() elif len(sys.argv) > 1 and sys.argv[1] == "id": print(equalize_node_ids(sys.argv[2],sys.argv[3])) elif len(sys.argv) > 1 and sys.argv[1] == "lineplots": arguments = parse_arguments() storage = DataStorage() # an ordered dict {"channel":[([txpowers],[values])]} for platform in platforms: arguments["platform"] = platform rel_chans = ["12","18","25","26"] for channel in range(11,27): arguments["channel"] = str(channel) for txpower in txpowers[arguments["platform"]]: #connecting a list of txpowers arguments["txpower"] = str(txpower) print(arguments["platform"],arguments["channel"],arguments["txpower"])
u'\u03B3': 'gamma', u'\u03B4': 'delta', u'\u03B5': 'epsilon', u'\u03B6': 'zeta', u'\u03B7': 'eta', u'\u03B8': 'theta', u'\u03B9': 'iota', u'\u03BA': 'kappa', u'\u03BB': 'lamda', u'\u03BC': 'mu', u'\u03BD': 'nu', u'\u03BE': 'xi', u'\u03BF': 'omicron', u'\u03C0': 'pi', u'\u03C1': 'rho', u'\u03C3': 'sigma', u'\u03C4': 'tau', u'\u03C5': 'upsilon', u'\u03C6': 'phi', u'\u03C7': 'chi', u'\u03C8': 'psi', u'\u03C9': 'omega', } # invert key,value alphabet = dict((name, uni) for (uni, name) in _greek_unicode_to_name.items()) # convienent ... if _has_datastorage: alphabet = DataStorage(alphabet)
def find_center_using_rings(img, sigma=3, high_threshold=20, low_threshold=10, mask=None, center=None, nrings=10, min_dist=100, max_peak_width=60, use_ellipse=False, plot=True, clim="auto", verbose=False, reprocess=False): """ Find beam center position finding powder rings and fitting them This functions tries to automatically find the power peaks. It uses several steps: 1. finds pixels belonging to a sharp peak by using skimage.canny 2. given an initial guess of the center, it build a distance histogram 3. finds peaks in histogram that should represent the edges of of a powder ring 4. fit pixels belowning to each peak (i.e. pixels within a ring) to a circle/ellipse 5. does some sanity check before each fit (minimum number of pixels, width of the peak, etc) and after (center cannot move too much 6. uses previously find centers (median value) to build distance histogram of pixels found by the canny filter Parameters ========== img: array or string image to use, if string, reads it with fabio sigma: float used by canny filter, see skimage.feature.canny doc {low|high}_threshold: float used by canny filter, see skimage.feature.canny. In general low=10, high=20 seems to work very well for wear and strong intensity images mask: boolean mask or something trx.mask.interpretMasks can understand True are pixels to mask out center: None or tuple if tuple, use it as first guess of center (has to be good to few tens of pixels) if None, it proposes a "click" method nrings: int number of rings to look for, can be high, if less peaks are found it will not bug out min_dist: float mimum distance to look for peaks, to avoid possible high intensity close to beam or beamstop max_peak_width: float do not even try to fit peaks that are broader than max_peak_width use_ellipse: bool if True fits with ellipse, else uses circle plot: bool if True plots rings, histograms and 2d integration (quite useful) clim: "auto" or tuple color scale to use for plots, if auto uses 20%,85% percentile verbose: bool increases verbosity (possibly too much !) reprocess: bool if True, at the end of the fits of all rings, it reruns with current best estimate (median of centers) to find other peaks that could not be intentified with initial guess """ # interpret inputs img = _prepare_img(img) mask = _prepare_mask(mask, img) if isinstance(clim, str) and clim == "auto": if mask is None: clim = np.percentile(img, (10, 95)) else: clim = np.percentile(img[~mask], (10, 95)) if center is None: plt.figure("Pick center") plt.imshow(img, clim=clim) print("Click approximate center") center = plt.ginput(1)[0] # use skimage canny filter # note: mask is "inverted" to be consistent with trx convention: True # are masked out edges = feature.canny(img, sigma, low_threshold=low_threshold, high_threshold=high_threshold, mask=~mask) points = np.array(np.nonzero(edges)).T if points.shape[0] == 0: raise ValueError( "Could not find any points, try changing the threshold or\ the initial center") else: print("Found %d points" % points.shape[0]) # swap x,y points = points[:, ::-1] image = np.zeros_like(img) if plot: plt.figure("fit ring") plt.imshow(img, clim=clim, cmap=plt.cm.gray_r) colors = plt.rcParams['axes.prop_cycle'] storage_fit = [] last_n_peaks = 0 for i, color in zip(range(nrings), colors): ## find points in a given circle based on histogam of distances ... # dist is calculate here because we can use previous cycle to # have improve peaks/beackground separation dist = np.linalg.norm(points - center, axis=1).ravel() if plot: plt.figure("hist") plt.hist(dist, 1000, histtype='step', label="ring %d" % (i + 1), **color) ## next is how to find the regions of the historam that should # represent peaks ... # we can start by some smoothing dist_hist, bins = np.histogram(dist, bins=np.arange(min_dist, dist.max())) bins_center = (bins[1:] + bins[:-1]) / 2 N = sigma * 2 # use triangular kernel kernel = np.concatenate( (np.arange(int(N / 2)), N / 2 - np.arange(int(N / 2) + 1))) # normalize it kernel = kernel / (N**2) / 4 dist_hist_smooth = np.convolve(dist_hist, kernel, mode='same') if plot: temp = dist_hist_smooth / dist_hist_smooth.max() * dist_hist.max() plt.plot(bins_center, temp, '--', **color) peaks_ranges = find_hist_ranges(dist_hist_smooth, x=bins_center, max_frac=0.1) n_peaks = peaks_ranges.shape[0] if verbose: peaks_ranges_str = map(str, peaks_ranges) peaks_ranges_str = ",".join(peaks_ranges_str) print("Iteration %d, found %d peaks, ranges %s" % (i, n_peaks, peaks_ranges_str)) if i >= n_peaks: print("asked for peaks than found, stopping") break idx = (dist > peaks_ranges[i, 0]) & (dist < peaks_ranges[i, 1]) # log.debug("dist_range",dist_range,idx.sum(),idx.shape) # sanity check if points[idx].shape[0] == 0: print("No point for circle", i) continue if points[idx].shape[0] < 20: print("Too few points to try fit, skipping to next circle") continue peak_width = peaks_ranges[i][1] - peaks_ranges[i][0] if peak_width > max_peak_width: print("Peak %d seems too large (%.0f pixels), skipping" % (i, peak_width)) continue else: if verbose: print("Peak %d width %.0f pixels" % (i, peak_width)) ## Do fit try: if use_ellipse: fit = fit_ellipse(points[idx, 0], points[idx, 1]) else: fit = leastsq_circle(points[idx, 0], points[idx, 1]) except (TypeError, np.linalg.LinAlgError): print("Fit failed for peak", i) continue # prevent outlayers to messup next circle is_ok = (n_peaks >= last_n_peaks-2) & \ (np.linalg.norm(fit.center-center) < 50) if not is_ok: continue center = fit.center #model_robust.params[0],model_robust.params[1] last_n_peaks = n_peaks storage_fit.append(fit) ## prepare out if use_ellipse: out_string = "ring %s" % (i + 1) out_string += " center: %.3f %.3f" % tuple(fit.center) out_string += " axis : %.3f %.3f" % tuple(fit.axis) out_string += " angle : %+.1f" % fit.angle else: out_string = "ring %s" % (i + 1) out_string += " center: %.3f %.3f" % tuple(fit.center) out_string += " radius : %.3f" % fit.radius print(out_string) if plot: plt.figure("fit ring") #plt.imshow(image) plt.plot(points[idx, 0], points[idx, 1], 'b.', markersize=1, **color) plt.plot(center[0], center[1], ".", markersize=20, **color) circle = plt.Circle(fit.center, radius=fit.radius, fill=False, **color) ax = plt.gca() ax.add_patch(circle) plt.pause(0.01) # package output out = DataStorage() for key in storage_fit[0].keys(): out[key] = np.asarray([f[key] for f in storage_fit]) out["%s_median" % key] = np.median(out[key], axis=0) out["%s_rms" % key] = np.std(out[key], axis=0) if plot: ai = azav.getAI(xcen=out["center_median"][0], ycen=out["center_median"][1], pixel=1e-3, distance=0.2) plt.figure("2D integration") x, y, i2d = azav.do2d(ai, img, mask=mask, unit="r_mm") vmin, vmax = np.percentile(i2d, (20, 90)) plt.pcolormesh(x, y, i2d, vmin=vmin, vmax=vmax) if reprocess: plt.close("all") return find_center_using_rings(img, sigma=sigma, high_threshold=high_threshold, low_threshold=low_threshold, mask=mask, plot=plot, center=out["center_median"], nrings=nrings, clim=clim, min_dist=min_dist, use_ellipse=use_ellipse, verbose=verbose, reprocess=False) return out