vaTfRecordFileNames = [
    os.path.join(cytoImagePath, "{0}.tfrecords".format(x)) for x in vaIdents
]
vaLabels = list(vaLabelsDf.iloc[:, 2])

#print("tf idents")
#print(trIdents)

trSamplesCount = len(trLabelsDf)
vaSamplesCount = len(vaLabelsDf)

print("{0} training samples, {1} val sample, {2} samples in total".format(
    trSamplesCount, vaSamplesCount, len(labelsDf)))


trImagesDs = tfdp.getTfRecordDataset(trTfRecordFileNames) \
    .map(tfdp.extractTilePackFromTfRecord)
trLabelsDs = tf.data.Dataset.from_tensor_slices(trLabels)


def trImageTransform(imagePack):
    return tf.random.shuffle(
                tfdp.augment(
                    tfdp.coerceSeqSize(imagePack, \
                        trainSequenceLength)
                    )
                )


def vaImageTransofrm(imagePack):
    return tfdp.coerceSeqSize(imagePack, \
                    trainSequenceLength)
for i in range(0, len(fullTrFilenames)):
    inFullPath = fullTrFilenames[i]
    outFullPath = fullOutFilenames[i]
    if not (os.path.exists(outFullPath)):
        toProcessIn.append(inFullPath)
        toProcessOut.append(outFullPath)
fullTrFilenames = toProcessIn
fullOutFilenames = toProcessOut
toGenerateCount = len(fullTrFilenames)
print("{0} previews already exist. {1} to generate".format(
    initiallyFound - toGenerateCount, toGenerateCount))

truncateCount = min(truncateCount, len(trFilenames))


def trImageTransform(imagePack):
    return tfdp.bigImageFromTiles(tfdp.coerceSeqSize(imagePack, 36), 6)
    #return tfdp.bigImageFromTiles(tfdp.augment(tfdp.coerceSeqSize(imagePack,36)), 6)

trImagesDs = tfdp.getTfRecordDataset(fullTrFilenames) \
    .map(tfdp.extractTilePackFromTfRecord,deterministic=True).map(trImageTransform,deterministic=True).take(truncateCount)

i = 0
for sample in trImagesDs.as_numpy_iterator():
    outFile = fullOutFilenames[i]
    io.imsave(outFile, sample)
    print("{0} is ready ({1} out of {2})".format(outFile, i + 1,
                                                 truncateCount))
    i += 1
print("Done")
Beispiel #3
0
tfrFilenameBases = [
    fname[0:32] for fname in tfrFilenames if fname.endswith(".tfrecords")
]
tfrFullFilenames = [
    os.path.join(cytoImagePath, fname) for fname in tfrFilenames
    if fname.endswith(".tfrecords")
]
print("Found {0} tfrecords files to pridict".format(len(tfrFilenames)))


def imageTransform(imagePack):
    return tfdp.coerceSeqSize(imagePack, sequenceLength)

imagesDs = tfdp.getTfRecordDataset(tfrFullFilenames) \
    .map(tfdp.extractTilePackFromTfRecord) \
    .map(imageTransform) \
    .batch(batchSize, drop_remainder=False) \
    .prefetch(prefetchSize)

model, backbone = constructModel(sequenceLength, DORate=0.3, l2regAlpha=0.0)
print("model constructed")

backbone.trainable = False

if os.path.exists(checkpointPath):
    print("Loading pretrained weights {0}".format(checkpointPath))
    model.load_weights(checkpointPath, by_name=True)
    print("Loaded pretrained weights {0}".format(checkpointPath))
else:
    print("Pretrained weights file does not exist: {0}".format(checkpointPath))
    exit(1)
from tqdm import tqdm

tfRecordsDir = sys.argv[1]
outHistFile = sys.argv[2]

files = os.listdir(tfRecordsDir)
files = [x for x in files if x.endswith('.tfrecords')]
print("{0} files to analyze".format(len(files)))

fullTfRecordPaths = [os.path.join(tfRecordsDir,x) for x in files]

N = len(files)
N = 50

ds = tfDataProcessing.getTfRecordDataset(fullTfRecordPaths) \
    .map(tfDataProcessing.extractTilePackFromTfRecord) \
    .prefetch(16) \
    .take(N)


print("Analyzing tile count frequencies")

freqDict = dict()
for sample in tqdm(ds.as_numpy_iterator(), total=N,ascii=True):
    tileCount,_,_,_ = sample.shape
    if tileCount in freqDict:
        prevCount = freqDict[tileCount]
    else:
        prevCount = 0
    freqDict[tileCount] = prevCount + 1

freqDf = pd.DataFrame.from_dict(freqDict, orient='index', columns=["Counts"])