def getDateAndTimeByKey(parameters, dt_key, task_id): dateValue = None timeValue = None if parameters.get("".join(["date", dt_key])): try: dateValue = parse(parameters.get("".join(["date", dt_key]))).date() except ValueError: Logger.warn( f"Bad date format. Continue running task without it. task_id: {task_id} \n" f"key=<{''.join(['date', dt_key])}>; value=<{parameters.get(''.join(['date', dt_key]))}>" ) if parameters.get("".join(["time", dt_key])): try: timeValue = parse(" ".join( ["1970-01-01", parameters.get("".join(["time", dt_key]))])).time() except ValueError: Logger.warn( f"Bad time format. Continue running task without it. task_id: {task_id} \n" f"key=<{''.join(['time', dt_key])}>; value=<{parameters.get(''.join(['time', dt_key]))}>" ) Logger.debug(dateValue) Logger.debug(timeValue) return (dateValue, timeValue)
def get_overlapping_paths(log: Logger, param_path: Path, compare_paths: List[Path], buffer_m: int = None) -> List[Path]: """Returns [compare_paths] that are within a buffered geometry of [param_path]. """ overlapping_paths = [param_path] path_geom_buff = param_path.geometry.buffer(buffer_m) for compare_path in [ compare_path for compare_path in compare_paths if compare_path.name != param_path.name ]: bool_within = compare_path.geometry.within(path_geom_buff) if (bool_within == True): overlapping_paths.append(compare_path) if (len(overlapping_paths) > 1): log.debug('found ' + str(len(overlapping_paths)) + ' overlapping paths for: ' + param_path.name + ' - ' + str([path.name for path in overlapping_paths])) return overlapping_paths
def get_unique_paths_by_geom_overlay(log: Logger, all_paths: List[Path], buffer_m: int = None, cost_attr: str = 'nei_norm') -> List[str]: """Filters a list of paths by comparing buffered line geometries of the paths and selecting only the unique paths by given buffer_m (m). Args: all_paths: Both short and green paths. buffer_m: A buffer size in meters with which the path geometries will be buffered when comparing path geometries. cost_attr: The name of a cost attribute to minimize when selecting the best of overlapping paths. Note: Filters out shortest path if an overlapping green path is found to replace it. Returns: A filtered list of paths having nearly unique line geometry with respect to the given buffer_m. None if PathSet contains only one path. """ if (len(all_paths) == 1): return None paths_already_overlapped = [] filtered_paths_names = [] for path in all_paths: if (path.name not in filtered_paths_names and path.name not in paths_already_overlapped): overlay_candidates = get_path_overlay_candidates_by_len( path, all_paths, len_diff=25) overlapping_paths = get_overlapping_paths(log, path, overlay_candidates, buffer_m) best_overlapping_path = get_least_cost_path(overlapping_paths, cost_attr=cost_attr) if (best_overlapping_path.name not in filtered_paths_names): filtered_paths_names.append(best_overlapping_path.name) paths_already_overlapped += [ path.name for path in overlapping_paths ] log.debug('filtered ' + str(len(filtered_paths_names)) + ' unique paths from ' + str(len(all_paths)) + ' unique paths by overlay') return filtered_paths_names
def uploadToDB(df): def insertChunk(inserterStatemant): for key in data_keys.keys(): if key in dicts[0].keys() or key == 'updated': try: norm_data_keys[key] = getattr(inserterStatemant.inserted, key) except AttributeError: pass inserterStatemant = inserterStatemant.on_duplicate_key_update( norm_data_keys ) try: db.session.execute(inserterStatemant) except (InternalError, DataError): raise RuntimeError(f"Data is not valid. " f"Problem finded in range " f"({last_boarder}, {min(last_boarder + chunkSize, len(dicts))}). " f"Try to check matching of values with its column names. " f"If you haven't find the problem, try to check the data in specified range.") dicts = df.to_dict('records') data_schema = DataSchema() data_keys = data_schema.dump(Data()).data norm_data_keys = {} last_boarder = 0 Logger.debug("Prepare to download") for i in range(chunkSize, len(dicts), chunkSize): inserterStatemant = insert(Data.__table__).values( dicts[i - chunkSize:i] ) insertChunk(inserterStatemant) Logger.debug(f"Downloaded chunk. Summary: {i}") last_boarder = i if last_boarder != len(dicts): inserterStatemant = insert(Data.__table__).values( dicts[last_boarder:len(dicts)] ) insertChunk(inserterStatemant) Logger.debug(f"Downloaded chunk. Summary: {len(dicts)}")
def addWhereToExpression(expr, tickers, dateFrom, timeFrom, dateTo, timeTo): expr = " ".join([expr, "WHERE "]) expr = "".join([expr, "("]) if tickers: expr = "".join([expr, "("]) for i in range(len(tickers)): if i != 0: expr = " ".join([expr, "OR", f"ticker='{tickers[i]}'"]) else: expr = "".join([expr, f"ticker='{tickers[i]}'"]) expr = "".join([expr, ")"]) if dateFrom or dateTo or timeFrom or timeTo: expr = " ".join([expr, "AND", "("]) needOR = False needAND = False if (dateFrom and not dateTo and timeTo) or \ (dateTo and not dateFrom and timeFrom): return None if dateFrom and not timeFrom: timeFrom = datetime.time(0, 0, 0) if dateTo and not timeTo: timeTo = datetime.time(23, 59, 59) Logger.debug(dateFrom) Logger.debug(dateTo) Logger.debug(timeFrom) Logger.debug(timeTo) if dateFrom and dateTo: if dateFrom > dateTo or (dateFrom == dateTo and timeFrom > timeTo): return None # даты между if not dateFrom or not dateTo or dateTo != dateFrom: if dateFrom or dateTo: expr = "".join([expr, "("]) if dateFrom: expr = "".join( [expr, f"date>'{dateFrom.strftime('%Y-%m-%d')}'"]) needAND = True if dateTo: if needAND: expr = " ".join([expr, "AND"]) expr = " ".join( [expr, f"date<'{dateTo.strftime('%Y-%m-%d')}'"]) if dateFrom or dateTo: expr = "".join([expr, ")"]) needOR = True needAND = False # слева if dateFrom or timeFrom: if needOR: expr = " ".join([expr, "OR"]) needOR = False expr = " ".join([expr, "("]) if dateFrom: expr = "".join([expr, f"date='{dateFrom.strftime('%Y-%m-%d')}'"]) needAND = True if timeFrom: if needAND: expr = " ".join([expr, "AND"]) needAND = False expr = " ".join([expr, f"time>='{timeFrom.strftime('%H:%M:%S')}'"]) needAND = True if dateFrom and dateTo and dateTo == dateFrom: if needAND: expr = " ".join([expr, "AND"]) needAND = False expr = " ".join([expr, f"time<='{timeTo.strftime('%H:%M:%S')}'"]) needAND = True if dateFrom or timeFrom: expr = "".join([expr, ")"]) needOR = True needAND = False # справа if not dateFrom or not dateTo or dateTo != dateFrom: if dateTo or timeTo: if needOR: expr = " ".join([expr, "OR"]) needOR = False expr = " ".join([expr, "("]) if dateTo: expr = "".join([expr, f"date='{dateTo.strftime('%Y-%m-%d')}'"]) needAND = True if timeTo: if needAND: expr = " ".join([expr, "AND"]) expr = " ".join( [expr, f"time<='{timeTo.strftime('%H:%M:%S')}'"]) if dateTo or timeTo: expr = "".join([expr, ")"]) needOR = True needAND = False expr = " ".join([expr, ")"]) expr = "".join([expr, ")"]) return expr
def ml_task_runner(self, parameters): start_time = time.perf_counter() self.update_state(state='PROGRESS', meta={ 'current': 1, 'total': 100, 'status': 'Importing started.' }) importlib.invalidate_caches() try: model_spec = importlib.util.spec_from_file_location( 'Model', parameters.get('model')) class_model = importlib.util.module_from_spec(model_spec) model_spec.loader.exec_module(class_model) model = class_model.Model() except (ImportError, AttributeError, TypeError) as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Bad preprocessor file.") try: prep_spec = importlib.util.spec_from_file_location( 'Preprocessor', parameters.get('preprocessor')) class_prep = importlib.util.module_from_spec(prep_spec) prep_spec.loader.exec_module(class_prep) prep = class_prep.Preprocessor() except (ImportError, AttributeError, TypeError) as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Bad preprocessor file.") self.update_state(state='PROGRESS', meta={ 'current': 3, 'total': 100, 'status': 'Importing finished. Receiving cursor.' }) # взять курсор dt_key = 'From' (dateFrom, timeFrom) = getDateAndTimeByKey(parameters, dt_key, self.request.id) dt_key = 'To' (dateTo, timeTo) = getDateAndTimeByKey(parameters, dt_key, self.request.id) tickers = parameters.get('ticker') if tickers and not isinstance(tickers, list): tickers = [tickers] expr = "SELECT * FROM Data" if tickers or dateFrom or timeFrom or dateTo or timeTo: expr = addWhereToExpression(expr, tickers, dateFrom, timeFrom, dateTo, timeTo) if not expr: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), "CombineError(Not an exception)") raise RuntimeError("Bad structure of finder fields.") expr = " ".join([expr, "ORDER BY date DESC, time DESC;"]) Logger.debug(expr) from app.fl_app import application with application.app_context(): try: cursor = db.engine.raw_connection().cursor() cursor.execute(expr) except Exception as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError( "Exception during getting cursor from database.") self.update_state(state='PROGRESS', meta={ 'current': 10, 'total': 100, 'status': 'Cursor has been received. Preprocessing started.' }) try: data = prep.preprocess(cursor) except (AttributeError, TypeError) as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Bad preprocessor file.") except Exception as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Exception in runtime of preprocessing.") self.update_state(state='PROGRESS', meta={ 'current': 35, 'total': 100, 'status': 'Preprocessing finished. Loading data started.' }) try: model.load(parameters.get('resource')) except (AttributeError, TypeError) as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Bad model file.") except Exception as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Exception in runtime of loading data.") self.update_state(state='PROGRESS', meta={ 'current': 65, 'total': 100, 'status': 'Loading data finished. Predicting started.' }) try: prediction = model.predict(data) except (AttributeError, TypeError) as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Bad model file.") except Exception as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Exception in runtime of predicting.") self.update_state(state='PROGRESS', meta={ 'current': 93, 'total': 100, 'status': 'Prediction made. Adding it to database.' }) # Вставка в db with application.app_context(): try: with transaction(): full_result = Results() full_result.model = os.path.basename(parameters.get('model')) full_result.preprocessor = os.path.basename( parameters.get('preprocessor')) full_result.resource = os.path.basename( parameters.get('resource')) full_result.personEmail = parameters.get('personEmail') for key in prediction.keys(): try: setattr(full_result, key, prediction.get(key)) except (AttributeError, TypeError): Logger.warn( f"Wrong attribute or type in predicton. " f"Continue running task. task_id: {self.request.id} \n" f"key=<{key}>; value=<{prediction.get(key)}>") db.session.add(full_result) except Exception as ex: celeryLogFailAndEmail(self.request.id, start_time, parameters.get('personEmail'), type(ex).__name__) raise RuntimeError("Exception during insertion into database.") db.session.commit() self.update_state(state='PROGRESS', meta={ 'current': 97, 'total': 100, 'status': 'Inserting to database finished. Sending mail' }) # Отправка сообщения на почту celeryLogSuccessAndEmail(self.request.id, start_time, parameters.get('personEmail'), prediction) return { 'current': 100, 'total': 100, 'status': 'Task completed!', 'result': prediction }