def on_message(client, userdata, msg): """Mock on_message method. Arguments: client {[type]} -- [description] userdata {[type]} -- [description] msg {[type]} -- [description] """ client = client userdata = userdata data = json.loads(msg.payload.decode()) data_point = DataPoint(identifier=data['id'], date=datetime.fromisoformat( data['content']['time_of_measurement']), temperature_f=data['content']['temperature_f'], temperature_c=data['content']['temperature_c'], type=data['type']) session.add(data_point) session.commit()
def submit_metric(): """ Submits the metric data for a specific computer if the computer doesn't exist it is created. """ gson = json.loads(request.get_json()) new_point = DataPoint(computer_name=gson["computer_name"], cpu_percentage=gson["cpu_percentage"], memory_percentage=gson["memory_percentage"], timestamp=gson["timestamp"]) with lock: if not instances.get(new_point.computer_name): instances[new_point.computer_name] = Timeline( maxsize=int(os.environ.get("COLLECTOR_BUFFER_SIZE"))) instances[new_point.computer_name].append(new_point) return Response(status=200)
def get_post_data(): if request.method == "GET": # We're sending all of our data to the user! data = session.query(DataPoint).all() return jsonify([i.serialize for i in data]) elif request.method == "POST": # There's a new data point in town! name = request.form.get("name") or "Anonymous" Rstar = request.form.get("Rstar", type=float) fp = request.form.get("fp", type=float) ne = request.form.get("ne", type=float) fl = request.form.get("fl", type=float) fi = request.form.get("fi", type=float) fc = request.form.get("fc", type=float) L = request.form.get("L", type=float) N = Rstar * fp * ne * fl * fi * fc * L new_data = DataPoint(name=name, N=N, Rstar=Rstar, fp=fp, ne=ne, fl=fl, fi=fi, fc=fc, L=L) session.add(new_data) session.commit() return jsonify(new_data.serialize) else: # The only two requests that we support are "GET" and "POST" return "Unsupported HTTP request", 400
def main(csvfile): # TODO use Pandas reader = csv.reader(csvfile, delimiter=',', quotechar='"') points = [] feature_vectors = [] idxs = set() names = set() preexist = set(FeatureVector.objects.all().values_list("exact_name", flat=True)) now = timezone.now() count = 0 for i, row in enumerate(reader): if not i: mapping = get_mapping(row) continue if row == [] or len(row) < max(mapping.values()): continue try: try: exact_name = get_exact_name(row[mapping["Name"]]) try: decay_feature = get_decay_feature_vector(exact_name) feature_vector = True if exact_name not in names and exact_name not in preexist: temp = FeatureVector(exact_name=exact_name, type=FeatureVector.DECAY, vector=decay_feature, created=now) temp.clean_fields() feature_vectors.append(temp) names.add(exact_name) if len(feature_vectors) > 150: FeatureVector.objects.bulk_create(feature_vectors) feature_vectors = [] except Exception: feature_vector = None except Exception: feature_vector = None exact_name = None band_gap = row[mapping["BandGap"]] data = { "name": row[mapping["Name"]], "options": row[mapping["Options"]], "h**o": row[mapping["H**O"]], "lumo": row[mapping["LUMO"]], "homo_orbital": row[mapping["HomoOrbital"]], "dipole": row[mapping["Dipole"]], "energy": row[mapping["Energy"]], "band_gap": band_gap if band_gap != '---' else None, "exact_name": exact_name, "created": now, } point = DataPoint(**data) point.clean_fields() points.append(point) if len(points) > 50: DataPoint.objects.bulk_create(points) points = [] if feature_vector is not None: idxs.add(count) count += 1 except Exception: pass DataPoint.objects.bulk_create(points) FeatureVector.objects.bulk_create(feature_vectors) Through = DataPoint.vectors.through temp = DataPoint.objects.filter(created=now).values_list( "pk", "exact_name") temp2 = FeatureVector.objects.all().values_list("exact_name", "pk") groups = dict(temp2) final = [] for i, (pk, name) in enumerate(temp): if i in idxs: final.append( Through(datapoint_id=pk, featurevector_id=groups[name])) if len(final) > 200: Through.objects.bulk_create(final) final = [] Through.objects.bulk_create(final) return count
def get_latest_data_point(self, stocks: List[str], current_time: datetime) -> Dict[str, DataPoint]: spark_data_frame_for_stock = self.spark.createDataFrame([], self.schema) pandas_data_frame = pandas\ .DataFrame(columns=['Datetime', 'Open', 'High', 'Low', 'Close', 'AdjustedClose', 'Volume', 'Symbol']) download_list = [] stocks_dict = {} start_time = current_time.strftime("%Y-%m-%d") end_time = (current_time + timedelta(days=1)).strftime("%Y-%m-%d") historical_data_path = "./datasets/historical_data/" folder_path = historical_data_path + start_time + "/" for stock in stocks: try: Path(folder_path).mkdir(parents=True, exist_ok=True) except OSError: print(f'Creation of the directory {folder_path} failed') stock_file = Path(folder_path + stock + ".csv") if stock_file.is_file( ) and current_time < datetime.now() + timedelta(hours=-24): # if stock data already downloaded, just load it # stock_data_spark_df = self.spark.read .csv(str(stock_file), schema=self.schema, timestampFormat="yyyy-MM-dd HH:mm:ss", header=True) # spark_data_frame_for_stock = spark_data_frame_for_stock.union(stock_data_spark_df) stock_data = pandas.read_csv(str(stock_file)) stock_data['Datetime'] = pandas.to_datetime( stock_data['Datetime'], format='%Y-%m-%d') pandas_data_frame = pandas.concat( [pandas_data_frame, stock_data]) else: # add stock to download list download_list.append(stock) if len(download_list) > 0: stocks_data = yf.download(download_list, start=start_time, end=end_time, interval="1m") if len(stocks_data) > 0: for stock in download_list: stock_data = stocks_data if len( download_list) == 1 else stocks_data[stock] stock_data = stock_data.rename( columns={"Adj Close": "AdjustedClose"}) stock_data = stock_data.reset_index() stock_data.dropna(inplace=True) stock_data["Datetime"] = stock_data["Datetime"].astype( str).str[:-6].astype('datetime64[ns]') stock_data["Volume"] = stock_data["Volume"].astype(float) stock_data["Symbol"] = stock stock_data.set_index('Datetime') if current_time < datetime.now() + timedelta(hours=-24): stock_file = Path(folder_path + stock + ".csv") stock_data.to_csv(path_or_buf=stock_file, index=False) pandas_data_frame = pandas.concat( [pandas_data_frame, stock_data]) # date_filter_string = "Datetime < '" + current_time.strftime("%Y-%m-%d %H:%M:%S") + "'" # spark_data_frame_for_stock = spark_data_frame_for_stock.union(self.spark.createDataFrame(stock_data, self.schema)) # data_frame = self.spark.createDataFrame(stock_data, self.schema) spark_data_frame_for_stock = self.spark.createDataFrame( pandas_data_frame, self.schema) for stock in stocks: last_point_row = spark_data_frame_for_stock \ .where(spark_data_frame_for_stock.Datetime <= current_time.strftime("%Y-%m-%d %H:%M:%S")) \ .where(spark_data_frame_for_stock.Symbol == stock)\ .sort("Datetime", ascending=False) \ .limit(1) \ .select("*") \ .first() data_point = DataPoint(last_point_row.Open, last_point_row.Close, last_point_row.High, last_point_row.Low, last_point_row.Volume, last_point_row.Datetime) stocks_dict[stock] = data_point return stocks_dict
def get_historical_data(self, stock: str, current_time: datetime, number_of_days: int = 10) -> List[DataPoint]: spark_data_frame_for_stock = self.spark.createDataFrame([], self.schema) for i in range(number_of_days + 1): start_time = (current_time - timedelta(days=i)).strftime("%Y-%m-%d") end_time = (current_time - timedelta(days=i - 1)).strftime("%Y-%m-%d") historical_data_path = "./datasets/historical_data/" folder_path = historical_data_path + start_time + "/" try: Path(folder_path).mkdir(parents=True, exist_ok=True) except OSError: print(f'Creation of the directory {folder_path} failed') # else: # print(f'Successfully created the directory {folder_path}') stock_file = Path(folder_path + stock + ".csv") if stock_file.is_file(): # if stock data already downloaded, just load it stock_data_spark_df = self.spark.read \ .csv(str(stock_file), schema=self.schema, timestampFormat="yyyy-MM-dd HH:mm:ss", header=True) spark_data_frame_for_stock = spark_data_frame_for_stock.union( stock_data_spark_df) else: # download if not downloaded stock_data = yf.download(stock, start=start_time, end=end_time, interval="1m") if len(stock_data) < 1: print(f'stock data not found on yahoo finance: {stock}') continue stock_data = stock_data.rename( columns={"Adj Close": "AdjustedClose"}) stock_data = stock_data.reset_index() stock_data.dropna(inplace=True) stock_data["Datetime"] = stock_data["Datetime"].astype( str).str[:-6].astype('datetime64[ns]') stock_data["Volume"] = stock_data["Volume"].astype(float) stock_data["Symbol"] = stock stock_data.set_index('Datetime') if current_time - timedelta( days=i) < datetime.now() + timedelta(days=-1): stock_data.to_csv(path_or_buf=stock_file, index=False) stock_data_spark_df = self.spark.createDataFrame( stock_data, self.schema) spark_data_frame_for_stock = spark_data_frame_for_stock.union( stock_data_spark_df) spark_data_frame_for_stock_sorted = spark_data_frame_for_stock\ .where(spark_data_frame_for_stock.Datetime <= current_time.strftime("%Y-%m-%d %H:%M:%S"))\ .sort("Datetime")\ .collect() list_of_data_points = [ DataPoint(row.Open, row.Close, row.High, row.Low, row.Volume, row.Datetime) for row in spark_data_frame_for_stock_sorted ] return list_of_data_points