Example #1
0
def _update_sdk(path_info):
    LOG.info('Updating SDK and downloading required Android platform '
             '(about 90MB, may take some time)')

    APPROX_UPPER_BOUND_ON_ANDROID_OUTPUT = 60
    android_process = lib.PopenWithoutNewConsole(
        [
            path_info.android, "update", "sdk", "--no-ui", "--filter",
            "platform-tool,tool,android-8"
        ],
        stdout=PIPE,
        stderr=STDOUT,
    )

    with ProgressBar('Installing Android SDK Components') as bar:
        finished = []

        def kill_adb_occasionally():
            """When updating the android sdk, occasionally ADB will have a lock on
			some files causing the update to fail. Killing it here helps the update succeed.
			"""
            while not finished:
                time.sleep(5)
                try:
                    # XXX: still time from check to use issue, but close enough
                    if not finished:
                        _kill_adb()
                except Exception:
                    pass

        adb_killing_thread = threading.Thread(target=kill_adb_occasionally)
        adb_killing_thread.daemon = True
        adb_killing_thread.start()

        for i, line in enumerate(iter(android_process.stdout.readline, '')):
            bar.progress(float(i) / APPROX_UPPER_BOUND_ON_ANDROID_OUTPUT)

        finished.append(True)
Example #2
0
    tv.transforms.ToTensor(),
    tv.transforms.Normalize([0.5] * 3, [0.5] * 3)
]))
train_loader = t_data.DataLoader(dataset,
                                 batch_size=CONFIG["BATCH_SIZE"],
                                 shuffle=True)

one = t.FloatTensor([1])
mone = -1 * one
one_var = t_auto.Variable(one.cuda() if CONFIG["GPU_NUMS"] > 0 else one)
mone_var = t_auto.Variable(mone.cuda() if CONFIG["GPU_NUMS"] > 0 else mone)

fix_noise = t.FloatTensor(100, CONFIG["NOISE_DIM"]).normal_(0, 1)
fix_noise_var = t_auto.Variable(
    fix_noise.cuda() if CONFIG["GPU_NUMS"] > 0 else fix_noise)
bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(train_loader),
                        "D Loss:%.3f;G Loss:%.3f")
for epoch in range(1, CONFIG["EPOCH"] + 1):
    for index, (image, label) in enumerate(train_loader):
        real = image
        real_var = t_auto.Variable(
            real.cuda() if CONFIG["GPU_NUMS"] > 0 else real)
        noise = t.randn(real_var.size(0), CONFIG["NOISE_DIM"])
        noise_var = t_auto.Variable(
            noise.cuda() if CONFIG["GPU_NUMS"] > 0 else noise)

        for parm in NetD.parameters():
            parm.data.clamp_(-CONFIG["CLAMP_NUM"], CONFIG["CLAMP_NUM"])

        NetD.zero_grad()
        D_real = NetD(real_var)
        D_real.backward(one_var)
Example #3
0
def run(file_path):
    # Init logging and database
    init_logging()
    client, file_col, schema_col, source_data_col = init_mongodb(config)

    # Set up counters and file index
    successfully_ingested_files = 0
    file_counter = 0
    file_list = DirLister.get_file_list_recursive(file_path)

    logging.info('Processing %d files from %s' % (len(file_list), file_path))

    for file in file_list:
        file_counter += 1
        ProgressBar.update_progress(file_counter / len(file_list),
                                    ('Processing file %s' % file))

        # get the file stats
        document = {
            'stats': FileStatter.stats(file),
            'filePath': file,
            '_id': file,
            'hash': FileStatter.sha1_from_file(file)
        }

        # Load the data or skip if unable
        if file.lower().endswith('.mif'):
            try:
                data = MIFparser.to_dict(file)
            except ValueError as e:
                logging.error(e)
                # if the data loading doesn't work out, just log the error and skip the file
                continue
        elif file.lower().endswith('.mid'):
            logging.debug('Skipping .mid file.')
            continue  # .mid files are processed along with its 'parented' .mif file
        else:
            try:
                data = CSVparser.to_dict(file)
            except ValueError as e:
                logging.error('CSV parsing error on file %s: %s' % (file, e))
                # if the data loading doesn't work out, just log the error and skip the file
                continue

        # Generate the schema and try to ingest it
        try:
            schema_data = SchemaGenerator.generate_schema(data)
        except Exception as e:
            logging.error('Schema error on file %s: %s' % (file, e))
            continue

        schema_hash = FileStatter.sha1(schema_data)
        schema = {
            '_id': schema_hash,
            'schema': schema_data,
        }

        try:
            schema_col.insert_one(schema)
        except DuplicateKeyError:
            logging.debug('Schema %s was previously processed' % schema_hash)
        except Exception as e:
            logging.error('Ingest schema error on file %s: %s' % (file, e))
            # if the schema loading doesn't work out, just log the error and skip the file
            continue

        # Store the source data
        source_data_doc_sha1 = FileStatter.sha1(data)
        source_data_doc = {'_id': source_data_doc_sha1, 'data': data}

        try:
            source_data_col.insert_one(document=source_data_doc)
        except DuplicateKeyError:
            logging.debug('Sourcedata with sha1 %s was previously processed' %
                          source_data_doc_sha1)
        except Exception as e:
            logging.error('Ingest source data error on file %s: %s' %
                          (file, e))
            continue

        # Finalize the file document with the data reference and the schema reference
        document['data'] = source_data_doc_sha1
        document['schema'] = schema['_id']

        try:
            file_col.insert_one(document=document)
        except DuplicateKeyError:
            logging.warning('File %s was previously processed, skipping' %
                            file)
            # Skip to next file
            continue
        except Exception as e:
            logging.error('Ingest file metadata error on file %s: %s' %
                          (file, e))
            continue

        logging.debug('File %s was successfully ingested' % file)
        successfully_ingested_files += 1

    logging.info('Finished!')
    logging.info('Successfully ingested %d files of %d' %
                 (successfully_ingested_files, len(file_list)))
    client.close()
    os.mkdir("output")

train_set = j_data.DataSetFromFolderForPix2Pix(
    os.path.join("/input/facades_fixed", "train"))
test_set = j_data.DataSetFromFolderForPix2Pix(
    os.path.join("/input/facades_fixed", "test"))
train_data_loader = t.utils.data.DataLoader(dataset=train_set,
                                            batch_size=CONFIG["BATCH_SIZE"],
                                            shuffle=True)
test_data_loader = t.utils.data.DataLoader(dataset=test_set,
                                           batch_size=CONFIG["BATCH_SIZE"],
                                           shuffle=True)

test_input, test_target = test_data_loader.__iter__().__next__()

bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(train_data_loader),
                        "D loss:%.3f;G loss:%.3f")
for epoch in range(1, CONFIG["EPOCH"] + 1):
    for i, (input, target) in enumerate(train_data_loader):
        x_ = t_auto.Variable(input.cuda() if CONFIG["GPU_NUM"] > 0 else input)
        y_ = t_auto.Variable(
            target.cuda() if CONFIG["GPU_NUM"] > 0 else target)

        # Train discriminator with real data
        D_real_decision = Net_D(x_, y_).squeeze()
        real_ = t_auto.Variable(
            t.ones(D_real_decision.size()).cuda(
            ) if CONFIG["GPU_NUM"] > 0 else t.ones(D_real_decision.size()))
        D_real_loss = BCE_loss(D_real_decision, real_)

        # Train discriminator with fake data
        gen_image = Net_G(x_)
''' HOCR '''
hocrFiles = [
    hocrFolder + f for f in os.listdir(hocrFolder) if f.find('.hocr') != -1
]
hocrFiles = sorted(hocrFiles)
HOCRs = {}
allGlyphs = []
print(hocrFiles)
for i, f in enumerate(hocrFiles):
    with open(f, "rb") as fp:
        pageHOCR = re.findall('\d+', f.split("/")[-1])[0]
        HOCRs[pageHOCR] = BeautifulSoup(fp, "lxml")
        allGlyphs += HOCRs[pageHOCR].find_all(attrs={"class": u"ocrx_cinfo"})

# Step one : sort unsorted images which are in the root folder
Bar1 = ProgressBar(len(listOutputFolder), 30,
                   "Sort unsorted images (Step 1/2)")
for imgUnsorted in listOutputFolder:
    Bar1.update()
    if re.findall('.png', imgUnsorted):  # if this element is an image :
        # get the char name of the glyph
        glyphName = imgUnsorted[0]
        # create dir if it's necessary, move the image in this dir
        if glyphName == ".":  # to fix "." name
            subprocess.call(["mkdir", "-p", outputFolder + ".point"])
            subprocess.call(
                ["mv", outputFolder + imgUnsorted, outputFolder + ".point/"])
        else:
            subprocess.call(["mkdir", "-p", outputFolder + glyphName])
            subprocess.call(
                ["mv", outputFolder + imgUnsorted, outputFolder + glyphName])
            subprocess.call(
                          betas=(0, .9))


def generate_random_sample():
    while True:
        random_indexes = numpy.random.choice(dataset.__len__(),
                                             size=CONFIG["BATCH_SIZE"],
                                             replace=False)
        batch = [dataset[i][0] for i in random_indexes]
        yield t.stack(batch, 0)


random_sample = generate_random_sample()

## Fitting model
bar = j_bar.ProgressBar(1, 5000, "D Loss%.3f;G Loss%.3f")
for i in range(1, 5000 + 1):
    for p in NetD.parameters():
        p.requires_grad = True

    for j in range(5):

        ########################
        # (1) Update D network #
        ########################

        NetD.zero_grad()

        # Sample real data
        real_images = random_sample.__next__()
        real_images = real_images.cuda(
Example #7
0
dataset = j_data.Cifar10DataSetForPytorch(
    train=True,
    transform=tv.transforms.Compose([
        tv.transforms.ToTensor(),
        # Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
    ]))
train_loader = t.utils.data.DataLoader(dataset,
                                       batch_size=BATCH_SIZE,
                                       shuffle=True)

noise = t.randn(100, NOISE_DIM)
noise_var = t_auto.Variable(noise.cuda() if GPU_NUMS > 0 else noise)

k = 0
proBar = j_bar.ProgressBar(EPOCHS, len(train_loader),
                           "D Loss:%.3f;G Loss:%.3f")
for epoch in range(1, EPOCHS + 1):
    for index, (images, _) in enumerate(train_loader):
        mini_batch = images.shape[0]
        noise = t_auto.Variable(
            t.FloatTensor(mini_batch, NOISE_DIM, 1, 1).cuda(
            ) if GPU_NUMS > 0 else t.FloatTensor(mini_batch, NOISE_DIM, 1, 1))
        real = t_auto.Variable(
            t.FloatTensor(mini_batch, IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE
                          ).cuda() if GPU_NUMS > 0 else t.
            FloatTensor(mini_batch, IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE))
        label = t_auto.Variable(
            t.FloatTensor(1).cuda() if GPU_NUMS > 0 else t.FloatTensor(1))

        Net_D.zero_grad()
        real.data.resize_(images.size()).copy_(images)
Example #8
0
if args.resize is not None:
    resize = args.resize
if args.tmp is not None:
    resizedFolder = args.temp
if args.style is not None:
    fontStyles = args.style

subprocess.call(["mkdir", "-p", levelFolder])
subprocess.call(["mkdir", "-p", resizedFolder])

percentResize = str(resize*100)+"%"

for fontStyle in fontStyles:
    styleFolder = averageFolder + "/" + fontStyle
    images = [styleFolder+"/"+f for f in os.listdir(styleFolder)]
    BarGlyph = ProgressBar(len(images), 30, "Glyphs : ")
    outputFolder = levelFolder + "/" + fontStyle
    if not os.path.isdir(outputFolder):
        os.mkdir(outputFolder)

    for glyph in images:
        glyphName = glyph.split("/")[-1].split(".")[-2]  # get the name of the glyph
        # print(glyphName)

        # save rescale and modify versions
        if resize != 1:
            subprocess.call(["convert", glyph, "-resize", percentResize, resizedFolder + glyphName + ".png"]) #save a rescale version
            for blur in blurs:
                for delta in deltas:
                    for level in levels:
                        m = int(level) - delta / 2
Example #9
0
# overwrite data from command line arguments
if args.output is not None:
    outputFolder = args.output
if args.target is not None:
    images2analysis = args.target
if args.lang is not None:
    lang = args.lang

# beginning
print(
    "start {} \nanalysis from :{} \nto output folder : {}\nin language : {}\n".
    format(__file__, images2analysis, outputFolder, lang))
subprocess.call(["mkdir", "-p", outputFolder])

if len(images2analysis) > 0:
    progressBar = ProgressBar(len(images2analysis), 30, "Analysis : ")
    for img in images2analysis:

        if os.path.isfile(img) and re.search(
                r"\.png|\.PNG|\.jpg|\.jpeg|\.JPG|\.JPEG|\.tif|\.TIF|\.jp2",
                img):
            outputName = img.split("/")[-1].split(".")[0:-1]
            subprocess.call([
                "tesseract",
                str(img),
                str(outputFolder) + str(outputName[0]), "-l", lang, "--dpi",
                str(resolution), "-c", "tessedit_create_hocr=1", "-c",
                "hocr_char_boxes=1"
            ])
        else:
            print(" ----> invalid file found : {}".format(img))
Example #10
0
if CONFIG["GPU_NUMS"] > 0:
    G = G.cuda()
    D = D.cuda()
    x = x.cuda()
    z = z.cuda()
    z_test = z_test.cuda()

x = Variable(x)
z = Variable(z)
z_test = Variable(z_test)

optimizerD = torch.optim.Adam(D.parameters(), lr=CONFIG["LEARNING_RATE"], betas=(0.5, 0.999), weight_decay=0)
optimizerG = torch.optim.Adam(G.parameters(), lr=CONFIG["LEARNING_RATE"], betas=(0.5, 0.999), weight_decay=0)

## Fitting model
bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(dataset), "D loss:%.3f;G loss:%.3f")
for epoch in range(1, CONFIG["EPOCH"] + 1):

    for i, data_batch in enumerate(dataset, 0):
        ########################
        # (1) Update D network #
        ########################

        for p in D.parameters():
            p.requires_grad = True

        # Train with real data
        D.zero_grad()
        # We can ignore labels since they are all cats!
        images, labels = data_batch
        # Mostly necessary for the last one because if N might not be a multiple of batch_size
Example #11
0
    ifHTML = args.html
if args.style is not None:
    fontStyles = args.style

if levelValue != 0:
    images = [i for i in images if i.find(str(levelValue)) != -1]

subprocess.call(["mkdir", "-p", pnmFolder])
subprocess.call(["mkdir", "-p", vectorsFolder])

for style in fontStyles:
    outputFolder = vectorsFolder + "/" + style + "/"
    subprocess.call(["mkdir", "-p", outputFolder])
    styleFolder = configdata['levelsFolder'] + "/" + style + "/"
    images = [styleFolder + "/" + f for f in os.listdir(styleFolder)]
    Bar = ProgressBar(len(images), 30, "Vectorisation : ")
    print(images)
    for i in images:
        print(i)
        iName = i.split("/")[-1].split(".")[-2]
        subprocess.call(["convert", i, pnmFolder + iName + ".pnm"])
        subprocess.call([
            "potrace", pnmFolder + iName + ".pnm", "-s", "-o",
            outputFolder + iName + ".svg"
        ])
        clearSvg = subprocess.check_output([
            "Toolbox/venv2/bin/python2.7",
            "Toolbox/extensionInkscape/applytransform.py",
            outputFolder + iName + ".svg"
        ])
        # print(clearSvg.decode("utf-8"))
Example #12
0
#     imgs[pageHOCR] = Image.open(imageSources[(int(pageHOCR) - 9)])

if len(HOCRs) == len(imgs):

    for pageNumber in sorted(HOCRs):  # page by page
        hocrDocument = HOCRs[pageNumber]
        imgPage = imgs[pageNumber]
        firstPage = hocrDocument.find(attrs={"class": u"ocr_page"})
        # xml browsing

        if firstPage is not None:
            nodeGlyphs = firstPage.find_all(attrs={"class": u"ocrx_cinfo"})

            if len(nodeGlyphs) > 0:

                BarByPage = ProgressBar(len(nodeGlyphs), 30, 'Extraction page ' + pageNumber)

                print(pageNumber)
                print(type(pageNumber))

                # unicodeChars = []
                coordsCorpList = []
                # find all element matched with cssSelector to find style of char
                stylised_nodes = {}
                for selector in cssSelectors:
                    stylised_nodes[cssSelectors[selector]] = []
                for selector in cssSelectors:
                    print(selector)
                    print(cssSelectors[selector])
                    stylised_nodes[cssSelectors[selector]] += hocrDocument.select(selector)
Example #13
0
    print(styleFolder)
    if os.path.isdir(styleFolder):
        if not os.path.exists(outputFolder + "/" + fontStyle):
            os.mkdir(outputFolder + "/" + fontStyle)
        if len(specified_glyph) > 0:
            glyphsFolders = [styleFolder + "/" + specified_glyph]
            print(glyphsFolders)
            if not os.path.exists(glyphsFolders[0]):
                print("specified glyph doesn't exist")
                exit()
        else:
            glyphsFolders = [
                styleFolder + "/" + f for f in os.listdir(styleFolder)
                if os.path.isdir(styleFolder + "/" + f)
            ]
        bar = ProgressBar(len(glyphsFolders), 30, "Averaging :")
        for f in glyphsFolders:
            images = list_all_fullpath_images(f)

            glyphStr = f.split("/")[-1]
            glyphName = str2glyphName(glyphStr)

            subprocess.call(["convert"] + images + [
                "-average", outputFolder + "/" + fontStyle + "/" +
                str(glyphName) + ".png"
            ])
            print(outputFolder + "/" + fontStyle + "/" + str(glyphName) +
                  ".png")
            #bar.update()
    else:
        print("no folder found")