def create_examples(data, bert_client, training=True, label2int=None, class_weight=None): """ data: pd.DataFrame label2int: dict class_weight: list yield examples """ idx_start = data.index[0] A_encoded = bert_client.encode(data['title1_en'].tolist()) B_encoded = bert_client.encode(data['title2_en'].tolist()) for i in range(len(data)): feature = { 'A_encoded': Feature(float_list=FloatList(value=A_encoded[i])), 'B_encoded': Feature(float_list=FloatList(value=B_encoded[i])) } if training: label = label2int[data.loc[idx_start + i, 'label']] feature['label'] = Feature(int64_list=Int64List(value=[label])) feature['class_weight'] = Feature(float_list=FloatList( value=[class_weight[label]])) else: feature['id'] = Feature(int64_list=Int64List( value=[data.loc[idx_start + i, 'id']])) yield Example(features=Features(feature=feature))
def convert_to_example( adj, feature, label_data=None, label_mask=None, ): """ Writes graph related data to disk. """ adj_row, adj_col = np.nonzero(adj) adj_values = adj[adj_row, adj_col] adj_elem_len = len(adj_row) degrees = np.sum(adj, 0) adj_degrees = [] for ar, ac in zip(adj_row, adj_col): if ar == ac: adj_degrees.append(0) else: adj_degrees.append(int(degrees[ar])) feature = np.array(feature) feature_row, feature_col = np.nonzero(feature) feature_values = feature[feature_row, feature_col] feature_elem_len = len(feature_row) feature = { 'adj_row': Feature(int64_list=Int64List(value=list(adj_row))), 'adj_column': Feature(int64_list=Int64List(value=list(adj_col))), 'adj_values': Feature(float_list=FloatList(value=list(adj_values))), 'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])), 'adj_degrees': Feature(int64_list=Int64List(value=adj_degrees)), 'feature_row': Feature(int64_list=Int64List(value=list(feature_row))), 'feature_column': Feature(int64_list=Int64List(value=list(feature_col))), 'feature_values': Feature(float_list=FloatList(value=list(feature_values))), 'feature_elem_len': Feature(int64_list=Int64List(value=[feature_elem_len])), 'size': Feature(int64_list=Int64List(value=list(feature.shape))) } if label_data is not None: label_data = np.nan_to_num(label_data) feature['label'] = Feature(int64_list=Int64List( value=label_data.astype(int))) feature['mask_label'] = Feature(int64_list=Int64List( value=label_mask.astype(int))), features = Features(feature=feature) ex = Example(features=features) return ex.SerializeToString()
def write_to_tfrecords(adj, feature, label_data, label_mask, tfrname): """ Writes graph related data to disk. """ adj_row, adj_col = np.nonzero(adj) adj_values = adj[adj_row, adj_col] adj_elem_len = len(adj_row) feature = np.array(feature) feature_row, feature_col = np.nonzero(feature) feature_values = feature[feature_row, feature_col] feature_elem_len = len(feature_row) features = Features( feature={ 'label': Feature(int64_list=Int64List(value=label_data)), 'mask_label': Feature(int64_list=Int64List(value=label_mask)), 'adj_row': Feature(int64_list=Int64List(value=list(adj_row))), 'adj_column': Feature(int64_list=Int64List(value=list(adj_col))), 'adj_values': Feature(float_list=FloatList(value=list(adj_values))), 'adj_elem_len': Feature(int64_list=Int64List(value=[adj_elem_len])), 'feature_row': Feature(int64_list=Int64List(value=list(feature_row))), 'feature_column': Feature(int64_list=Int64List(value=list(feature_col))), 'feature_values': Feature(float_list=FloatList(value=list(feature_values))), 'feature_elem_len': Feature(int64_list=Int64List(value=[feature_elem_len])), 'size': Feature(int64_list=Int64List(value=list(feature.shape))) }) ex = Example(features=features) with TFRecordWriter(tfrname) as single_writer: single_writer.write(ex.SerializeToString())
def get_cycle_example(cell_value, summary_idx, cycle_idx, scaling_factors): """ Define the columns that should be written to tfrecords and converts the raw data to "Example" objects. Every Example contains data from one charging cycle. The data is scaled (divided) by the corresponding values in "scaling_factors". """ # Summary feature values (scalars --> have to be wrapped in lists) ir_value = [ cell_value["summary"][cst.INTERNAL_RESISTANCE_NAME][summary_idx] / scaling_factors[cst.INTERNAL_RESISTANCE_NAME] ] qd_value = [ cell_value["summary"][cst.QD_NAME][summary_idx] / scaling_factors[cst.QD_NAME] ] rc_value = [ cell_value["summary"][cst.REMAINING_CYCLES_NAME][summary_idx] / scaling_factors[cst.REMAINING_CYCLES_NAME] ] dt_value = [ cell_value["summary"][cst.DISCHARGE_TIME_NAME][summary_idx] / scaling_factors[cst.DISCHARGE_TIME_NAME] ] cc_value = [float(cycle_idx) / scaling_factors[cst.REMAINING_CYCLES_NAME] ] # Same scale --> same scaling factor # Detail feature values (arrays) qdlin_value = cell_value["cycles"][cycle_idx][ cst.QDLIN_NAME] / scaling_factors[cst.QDLIN_NAME] tdlin_value = cell_value["cycles"][cycle_idx][ cst.TDLIN_NAME] / scaling_factors[cst.TDLIN_NAME] # Wrapping as example cycle_example = Example(features=Features( feature={ cst.INTERNAL_RESISTANCE_NAME: Feature(float_list=FloatList(value=ir_value)), cst.QD_NAME: Feature(float_list=FloatList(value=qd_value)), cst.REMAINING_CYCLES_NAME: Feature(float_list=FloatList(value=rc_value)), cst.DISCHARGE_TIME_NAME: Feature(float_list=FloatList(value=dt_value)), cst.QDLIN_NAME: Feature(float_list=FloatList(value=qdlin_value)), cst.TDLIN_NAME: Feature(float_list=FloatList(value=tdlin_value)), cst.CURRENT_CYCLE_NAME: Feature(float_list=FloatList(value=cc_value)) })) return cycle_example