def _generate_examples(self, gloss_path: str, text_path: str): """Yields examples.""" with GFile(gloss_path, "r") as gloss_f: with GFile(text_path, "r") as text_f: for i, (gloss, text) in enumerate(zip(gloss_f, text_f)): yield i, {"gloss": gloss, "text": text}
def get_embedding_index(self, glove_path=None): if glove_path is None: glove_path = self.glove_path embeddings_index = {} # Use this to read from Cloud Storage if "gs://" in glove_path: f = GFile(glove_path, mode='r') else: f = open(glove_path) for line in f: values = line.split() word = values[0] coefs = np.asarray(values[1:], dtype='float32') embeddings_index[word] = coefs f.close() return embeddings_index
def load_data(self): data = GFile(self.file_path, 'rb').read().decode(encoding='UTF-8') # Get a list of the unique characters in the text vocab = list(sorted(set(data))) vocab_size = len(vocab) chars_to_ids = StringLookup(vocabulary=vocab) self.ids_to_chars_layer = StringLookup( vocabulary=chars_to_ids.get_vocabulary(), invert=True) # Split the entire text by character chars = unicode_split(data, 'UTF-8') ids_of_chars = chars_to_ids(chars) # Group characters to form sequences (+1 since the targets are shifted by one) sequences_ds = Dataset.from_tensor_slices(ids_of_chars) sequences_ds = sequences_ds.batch(C.SEQUENCE_LENGTH + 1) # Batch the sequences ds = sequences_ds.padded_batch(C.BATCH_SIZE) ds = ds.map(self._to_inputs_and_targets, num_parallel_calls=tf.data.experimental.AUTOTUNE) ds = ds.shuffle(C.BUFFER_SIZE) ds = ds.prefetch(tf.data.experimental.AUTOTUNE) return ds
def _get_unique_documents(self, tokenizer): """Provides an iterator over all documents, skipping duplicated text.""" document = proto_document.Document() characters_per_sentence = set() with GFile(self.path, "rb") as src_file: msg_buf = src_file.read(_MAX_BINPROTO_PREFIX_LENGTH) while msg_buf: # Get the message length. msg_len, new_pos = _DecodeVarint32(msg_buf, 1) msg_buf = msg_buf[new_pos:] # Read the rest of the message. msg_buf += src_file.read(msg_len - len(msg_buf)) document.ParseFromString(msg_buf) msg_buf = msg_buf[msg_len:] # Read the length prefix for the next message. msg_buf += src_file.read(_MAX_BINPROTO_PREFIX_LENGTH) characters = remove_whitespace_and_parse( document.text, tokenizer) if characters in characters_per_sentence or len( characters) == 0: continue characters_per_sentence.add(characters) yield self._convert_token_boundaries_to_codeunits(document)
def save_architecture(self, log_dir): # Ensure the log_dir exists pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True) with GFile(os.path.join(log_dir, 'style_transfer_architecture.json'), 'w') as f: f.write(self.to_json())
def _split_generators(self, dl_manager: tfds.download.DownloadManager): """Returns SplitGenerators.""" data_index_path = dl_manager.download(_INDEX_URL) # Download videos and update paths with GFile(data_index_path, "r") as f: data = json.load(f) if self._builder_config.include_video: paths = self._download_videos(data, dl_manager) for datum in data: for instance in datum["instances"]: instance["video"] = paths[ instance["video_id"]] if instance[ "video_id"] in paths else None if self._builder_config.include_pose == "openpose": pose_path = dl_manager.download_and_extract( _POSE_URLS[self._builder_config.include_pose]) else: pose_path = None return { "train": self._generate_examples(data, pose_path, "train"), "validation": self._generate_examples(data, pose_path, "val"), "test": self._generate_examples(data, pose_path, "test"), }
def imwrite(f, a, fmt=None): a = np.asarray(a) if isinstance(f, str): fmt = f.rsplit('.', 1)[-1].lower() if fmt == 'jpg': fmt = 'jpeg' f = GFile(f, mode='wb') np2pil(a).save(f, fmt, quality=95)
def _get_unique_text_and_characters(self, tokenizer): characters_per_sentence = set() with GFile(self.path, "r") as file: for text in file: characters = remove_whitespace_and_parse(text, tokenizer) if (characters in characters_per_sentence or len(characters)) == 0: continue characters_per_sentence.add(characters) yield text, characters
def init_feature_face(self): with self.face_feature_sess.as_default(): with self.face_feature_sess.graph.as_default(): with GFile(self.ff_pb_path, 'rb') as f: graph_def = self.face_feature_sess.graph_def graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') self.ff_images_placeholder = get_default_graph().get_tensor_by_name("input:0") self.ff_train_placeholder = get_default_graph().get_tensor_by_name("phase_train:0") self.ff_embeddings = get_default_graph().get_tensor_by_name("embeddings:0")
def _save_predictions_as_binproto( output_directory, file_name, characterwise_predicted_label_names_per_sentence, words_per_sentence): """Saves the hypotheses to a .binproto file.""" def _add_labeled_span(label_start_char, label_end_char, label, words, proto_labeled_spans): """Adds a new labeled span to the list.""" label_start, label_end = (_get_word_indices_from_character_indices( words, label_start_char, label_end_char)) proto_labeled_spans.labeled_span.add(token_start=label_start, token_end=label_end, label=label) with GFile(os.path.join(output_directory, file_name), "wb") as output_file: documents = proto_documents.Documents() for (characterwise_predicted_label_names, words) in zip(characterwise_predicted_label_names_per_sentence, words_per_sentence): document = documents.documents.add() document.text = "" token_start = 0 for word in words: if len(document.text) > 0 and any(c.isalnum() for c in word): document.text += " " token_start += 1 document.text += word num_bytes = len(bytes(word, "utf-8")) document.token.add(start=token_start, end=token_start + num_bytes - 1, word=word) token_start += num_bytes label_start_char = 0 label = None proto_labeled_spans = document.labeled_spans["lucid"] for i, label_name in enumerate( characterwise_predicted_label_names): if _is_label_type(label_name, LabelType.OUTSIDE): if label is not None: _add_labeled_span(label_start_char, i - 1, label, words, proto_labeled_spans) label = None elif _is_label_type(label_name, LabelType.BEGINNING): if label is not None: _add_labeled_span(label_start_char, i - 1, label, words, proto_labeled_spans) label = label_name[len("B-"):] label_start_char = i else: assert label_name == "I-%s" % label if label is not None: _add_labeled_span(label_start_char, len(characterwise_predicted_label_names) - 1, label, words, proto_labeled_spans) output_file.write(documents.SerializeToString())
def init_face_landmark_tf(self): with self.face_landmark_sess.as_default(): with self.face_landmark_sess.graph.as_default(): graph_def = self.face_landmark_sess.graph_def with GFile(self.landmark_pb_path, 'rb') as fid: serialized_graph = fid.read() graph_def.ParseFromString(serialized_graph) tf.import_graph_def(graph_def, name='') self.face_landmark_tensor = get_default_graph(). \ get_tensor_by_name("fully_connected_9/Relu:0")
def createMultiModelMaximum(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert_layer = BertModelLayer.from_params(bert_params, name="bert") bert_in = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids_bert") bert_inter = bert_layer(bert_in) cls_out = Lambda(lambda seq: seq[:, 0, :])(bert_inter) cls_out = Dropout(0.5)(cls_out) bert_out = Dense(units=768, activation="tanh")(cls_out) # 768 before load_stock_weights(bert_layer, bert_ckpt_file) # image models: inceptionv3 = InceptionV3(weights='imagenet', include_top=False) resnet50 = ResNet50(weights='imagenet', include_top=False) res_out = resnet50.output res_out = GlobalAveragePooling2D()(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(2048)(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(768)(res_out) inc_out = inceptionv3.output inc_out = GlobalAveragePooling2D()(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(2048)(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(768)(inc_out) # merge = Concatenate()([res_out, inc_out, bert_out]) merge = Maximum()([res_out, inc_out, bert_out]) # restliche Layer x = Dense(2048)(merge) x = Dropout(0.5)(x) x = Dense(1024)(x) x = Dropout(0.5)(x) x = Dense(512)(x) x = Dropout(0.5)(x) output = Dense(NUM_CLASS, activation='softmax', name='output_layer')(x) model = Model(inputs=[resnet50.input, inceptionv3.input, bert_in], outputs=output) plot_model(model, to_file='multiple_inputs_text.png', show_shapes=True, dpi=600, expand_nested=False) return model, 17
def _generate_examples(self, videos_path: Union[str, None], poses_path: Union[str, None], labels_path: Union[str, None]): """Yields examples.""" if labels_path is not None: with GFile(labels_path, "r") as labels_file: labels = { sample_id: int(label_id) for sample_id, label_id in csv.reader(labels_file) } else: labels = None if videos_path is not None: samples = { tuple(f.split("_")[:2]) for f in os.listdir(videos_path) } elif poses_path is not None: samples = { tuple(f.split(".")[0].split("_")) for f in os.listdir(poses_path) } elif labels_path is not None: samples = {tuple(k.split("_")) for k in labels.keys()} else: raise Exception("Found no samples to generate") for signer, sample in samples: datum = dict({ "id": signer + "_" + sample, "signer": int(signer[6:]), "sample": int(sample[6:]) }) datum["gloss_id"] = labels[ datum["id"]] if labels is not None else -1 if videos_path is not None: datum[ "fps"] = self._builder_config.fps if self._builder_config.fps is not None else 30 datum["video"] = os.path.join(videos_path, datum["id"] + "_color.mp4") datum["depth_video"] = os.path.join(videos_path, datum["id"] + "_depth.mp4") if poses_path is not None: datum["pose"] = os.path.join(poses_path, datum["id"] + ".pose") yield datum["id"], datum
def init_face_attribute(self): with self.face_attribute_sess.as_default(): with self.face_attribute_sess.graph.as_default(): graph_def = self.face_attribute_sess.graph_def with GFile(self.attribute_pb_path, 'rb') as fid: serialized_graph = fid.read() graph_def.ParseFromString(serialized_graph) tf.import_graph_def(graph_def, name='') self.pred_eyeglasses = get_default_graph().get_tensor_by_name("ArgMax:0") self.pred_young = get_default_graph().get_tensor_by_name("ArgMax_1:0") self.pred_male = get_default_graph().get_tensor_by_name("ArgMax_2:0") self.pred_smiling = get_default_graph().get_tensor_by_name("ArgMax_3:0") self.face_attribute_image_tensor = get_default_graph().get_tensor_by_name("Placeholder:0")
def imread(url, max_size=None, mode=None): if isinstance(url, str): if url.startswith(('http:', 'https:')): r = requests.get(url) f = io.BytesIO(r.content) else: f = GFile(url, mode='rb') else: f = url img = PIL.Image.open(f) if max_size is not None: img.thumbnail((max_size, max_size), PIL.Image.ANTIALIAS) if mode is not None: img = img.convert(mode) img = np.float32(img)/255.0 return img
def load(cls, log_dir=None, epoch=None): model_found = bool(log_dir) and pathlib.Path( os.path.join(log_dir, 'style_transfer_architecture.json')).is_file() # If there isn't already a model create one from scratch and save it if not model_found: model = cls() if log_dir: model.save_architecture(log_dir) model.save_encoder(os.path.join(log_dir, 'encoder')) return model # Load the model's architecture with tf.keras.utils.custom_object_scope({ 'StyleTransfer': cls, 'Conv2DReflectivePadding': Conv2DReflectivePadding }): saved_json = GFile( os.path.join(log_dir, 'style_transfer_architecture.json'), 'r').read() model = tf.keras.models.model_from_json(saved_json) model.encoder = tf.keras.models.load_model( os.path.join(log_dir, 'encoder')) # If an epoch was provided, load the model at that epoch if epoch is not None: epoch_path = os.path.join(log_dir, 'weights', f'epoch_{epoch}') if not pathlib.Path(epoch_path).is_dir(): print(f"Epoch {epoch} doesn't exists") return print('Loading Checkpoint:', epoch_path) model.decoder = tf.keras.models.load_model(epoch_path) model.decoder_compiled = True else: # Load the decoder's latest weights if there are any ckpts = glob.glob(os.path.join(log_dir, 'weights', '*')) if ckpts: latest_ckpt = max(ckpts, key=os.path.getmtime) print('Loading Checkpoint:', latest_ckpt) model.decoder = tf.keras.models.load_model(latest_ckpt) model.decoder_compiled = True return model
def init_detection_face_tf(self): with self.face_detection_sess.as_default(): with self.face_detection_sess.graph.as_default(): face_detect_od_graph_def = self.face_detection_sess.graph_def with GFile(self.detect_pb_path, 'rb') as fid: serialized_graph = fid.read() face_detect_od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(face_detect_od_graph_def, name='') ops = get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} self.detection_tensor_dict = {} for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes']: tensor_name = key + ':0' if tensor_name in all_tensor_names: self.detection_tensor_dict[key] = get_default_graph().get_tensor_by_name( tensor_name) self.detection_image_tensor = get_default_graph().get_tensor_by_name('image_tensor:0')
def load(self, model_file: str): """Load a model Arguments: model_file {str} -- The local cached model files that can be loaded """ sess = tf.compat.v1.Session() self.log.debug(f"Loading frozen graph {model_file} as TF Graph ...") with GFile(model_file, 'rb') as f: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') self.node_names = set([n.name for n in graph_def.node]) self.log.debug("Graph loaded for the session.") self.sess = sess
def __init__(self, checkpoint_filename, input_name="images", output_name="features"): self.session = Session() with GFile(checkpoint_filename, "rb") as file_handle: graph_def = GraphDef() graph_def.ParseFromString(file_handle.read()) import_graph_def(graph_def, name="net") self.input_var = get_default_graph().get_tensor_by_name("net/%s:0" % input_name) self.output_var = get_default_graph().get_tensor_by_name("net/%s:0" % output_name) assert len(self.output_var.get_shape()) == 2 assert len(self.input_var.get_shape()) == 4 self.feature_dim = self.output_var.get_shape().as_list()[-1] self.image_shape = self.input_var.get_shape().as_list()[1:]
def create_text_model(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=False, pathToBertModelWeights=None, isTrainable=True): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) if overwriteLayerAndEmbeddingSize: bc.max_position_embeddings = max_seq_len bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = Lambda(lambda seq: seq[:, 0, :], name='bert_output_layer_768')(bert_output) cls_out = Dropout(0.5)(cls_out) output = Dense(NUM_CLASS, activation="softmax")(cls_out) # model_bert = Model(inputs=input_ids, outputs=output, name='BERT') model_bert.build(input_shape=(None, max_seq_len)) if not isPreTrained: load_stock_weights(bert, bert_ckpt_file) return model_bert else: model_bert.load_weights(pathToBertModelWeights) if not isTrainable: for layer in model_bert.layers: layer.trainable = False return model_bert, 2
def get_labeled_text(self, tokenizer): """Provides an iterator over all labeled texts in the linkfragments. This cannot skip entries with duplicated text like similar methods in the other readers, because text may be duplicated if there are multiple labels. This is handled by the caller. """ with GFile(self.path, "r") as file: for linkfragment in file: text, label_description = linkfragment.split("\t") prefix, remaining_text = text.split("{{{") labeled_text, suffix = remaining_text.split("}}}") prefix = prefix.strip() labeled_text = labeled_text.strip() label_description = label_description.strip() suffix = suffix.strip() if label_description == LF_ADDRESS_LABEL: label = MAIN_LABEL_ADDRESS elif label_description == LF_TELEPHONE_LABEL: label = MAIN_LABEL_TELEPHONE else: label = LABEL_OUTSIDE text_without_braces = text.replace("{{{", "").replace("}}}", "") text_without_braces = text_without_braces.strip() characters = remove_whitespace_and_parse( text_without_braces, tokenizer) if len(characters) == 0: continue yield LabeledExample(prefix=prefix, selection=labeled_text, suffix=suffix, complete_text=text_without_braces, label=label)
def get_svg_ds(self): data = GFile('datasets/svgs/simpleline.svg', 'rb').read().decode(encoding='UTF-8') # Get the list of the unique characters in the text vocab = ['e', 'g', 'n', 'r', '\n'] vocab_size = len(vocab) # Build the id to char lookup table chars_to_ids = StringLookup(vocabulary=vocab) self.ids_to_chars_layer = StringLookup( vocabulary=chars_to_ids.get_vocabulary(), invert=True) # Split the entire text by character chars = unicode_split(data, 'UTF-8') ids_of_chars = chars_to_ids(chars) # Group characters to form sequences svg_ds = Dataset.from_tensor_slices(ids_of_chars) svg_ds = svg_ds.batch(C.SEQUENCE_LENGTH) svg_ds = svg_ds.batch(C.BATCH_SIZE) return svg_ds
def _save_predictions_as_lftxt( output_directory, file_name, characterwise_predicted_label_names_per_sentence, words_per_sentence): """Saves the hypotheses to an .lftxt file.""" with GFile(os.path.join(output_directory, file_name), "w") as output_file: for (characterwise_predicted_label_names, words) in zip(characterwise_predicted_label_names_per_sentence, words_per_sentence): label_start = 0 label = None saved_at_least_once = False for i, label_name in enumerate( characterwise_predicted_label_names): if _is_label_type(label_name, LabelType.OUTSIDE): if label is not None: _save_as_linkfragment(words, label_start, i - 1, label, output_file) label = None saved_at_least_once = True elif _is_label_type(label_name, LabelType.BEGINNING): if label is not None: _save_as_linkfragment(words, label_start, i - 1, label, output_file) saved_at_least_once = True label = label_name[len("B-"):] label_start = i else: assert label_name == "I-%s" % label # If the label goes until the very end of the sentence. if label is not None: _save_as_linkfragment( words, label_start, len(characterwise_predicted_label_names) - 1, label, output_file) saved_at_least_once = True if not saved_at_least_once: _save_as_linkfragment(words, 0, -1, "OUTSIDE", output_file)
def _generate_examples(self, annotations_path: str, pose_path: str, split: str): """ Yields examples. """ filepath = path.join(annotations_path, "annotations", "manual", "PHOENIX-2014-T." + split + ".corpus.csv") images_path = path.join(annotations_path, "features", "fullFrame-210x260px", split) poses_path = path.join(pose_path, split) if pose_path is not None else None with GFile(filepath, "r") as f: data = csv.DictReader(f, delimiter="|", quoting=csv.QUOTE_NONE) for row in data: datum = { "id": row["name"], "signer": row["speaker"], "gloss": row["orth"], "text": row["translation"], } if self._builder_config.include_video: frames_base = path.join(images_path, row["video"])[:-7] datum["video"] = [ path.join(frames_base, name) for name in sorted(tf.io.gfile.listdir(frames_base)) if name != "createDnnTrainingLabels-profile.py.lprof" ] datum[ "fps"] = self._builder_config.fps if self._builder_config.fps is not None else 25 if poses_path is not None: datum["pose"] = path.join(poses_path, datum["id"] + ".pose") yield datum["id"], datum
def _visualise(test_name, characterwise_target_labels_per_sentence, characterwise_predicted_labels_per_sentence, characters_per_sentence, words_per_sentence, visualised_label, visualisation_folder): """Generates a .html file comparing the hypothesis/target labels.""" _assert_same_length([ characterwise_target_labels_per_sentence, characterwise_predicted_labels_per_sentence, characters_per_sentence ]) number_of_sentences = len(characterwise_target_labels_per_sentence) directory = os.path.join(visualisation_folder, test_name) if not os.path.exists(directory): os.makedirs(directory) file_name = os.path.join(directory, "%s.html" % visualised_label.lower()) with GFile(file_name, "w") as file: file.write("%s labels in %s <br>\n" % (visualised_label, test_name)) file.write("<font color='green'>Correct labels</font> <br>\n") file.write("<font color='blue'>Superfluous labels</font> <br>\n") file.write("<font color='red'>Missed labels</font> <br>\n") file.write("<br>\n") for i in range(number_of_sentences): characterwise_target_labels = ( characterwise_target_labels_per_sentence[i]) characterwise_predicted_labels = ( characterwise_predicted_labels_per_sentence[i]) characters = characters_per_sentence[i] words = words_per_sentence[i] characterwise_target_labels_length = len( characterwise_target_labels) characterwise_predicted_labels_length = len( characterwise_predicted_labels) characters_length = len(characters) assert ( characterwise_target_labels_length == characterwise_predicted_labels_length == characters_length ), ("Hypotheses/targets have different lengths: %d, %d, %d" " (sentence %d)") % (characterwise_target_labels_length, characterwise_predicted_labels_length, characters_length, i) word_index = 0 word_position = 0 for target_label, predicted_label, character in zip( characterwise_target_labels, characterwise_predicted_labels, characters): if target_label.endswith( visualised_label) and predicted_label.endswith( visualised_label): file.write("<font color='green'>" + character + "</font>") elif target_label.endswith(visualised_label): file.write("<font color='red'>" + character + "</font>") elif predicted_label.endswith(visualised_label): file.write("<font color='blue'>" + character + "</font>") else: file.write(character) word_position += 1 if word_position == len(words[word_index]): word_index += 1 word_position = 0 file.write(" ") file.write("<br>\n")
def get_callbacks(): callbacks = [] if C.TENSORBOARD_CALLBACK: callbacks.append(TensorBoard(os.path.join(C.LOG_DIR, 'tensorboard'))) if C.CHECKPOINT_CALLBACK: callbacks.append( ModelCheckpoint(os.path.join(C.LOG_DIR, 'models/epoch_{epoch}'))) return callbacks #%% # Convert the config json to an object, this config file contains various settings to control the training C = json.load(GFile('config/densenet.json', 'r'), object_hook=lambda d: SimpleNamespace(**d)) if C.LOG_TO_CONSOLE: sys.stdout = open('/dev/stdout', 'w') train_ds, val_ds = load_ds() dense_net = DenseNet(num_classes=100, block_sizes=C.BLOCK_SIZES, init_filters=C.INIT_FILTERS, growth_rate=C.GROWTH_RATE, low_res=True) dense_net.compile(Adam(lr=C.INIT_LR), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
name_to_op = {} conv_index_to_op = defaultdict(list) add_max_input_to_op = {} resize_input_to_op = {} concat_input_to_op = {} DARKNET_CONF_PATH = './model/gen/Yolov3_q.cfg' # path to store the DARKNET_WEIGHT_PATH = './model/gen/Yolov3_q.weights' GRAPH_PB_PATH = './model/deploy_model.pb' max_layer_index = -1 print("loading graph! ....") with GFile(GRAPH_PB_PATH, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) for node in graph_def.node: node_name = node.name name_to_op[node_name] = node try: node_index = get_node_index(node_name) except: print('could not get the node index for: ', node_name) exit(-1) print('Working on node {} with index {} with op {}'.format( node_name, node_index, node.op))
def _handle_gfile(url, mode="rb"): return GFile(url, mode)