Python ImageParserの例、image_parser.ImageParser Pythonの例

コード例 #1

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: american-art/iiif

 def __init__(self, config_file):
     self.config = self.get_config(config_file)
     self.fileNameParser = FileNameParser(self.config)
     self.imageParser = ImageParser()
     self.blackList = set()
     blackList_URL_Folder = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'museum-no-reference-urls')
     blacklistFile = os.path.join(blackList_URL_Folder, "blackList.txt")
     self.loadFile(blacklistFile)
     self.fob = open(blacklistFile, 'a')

コード例 #2

0

ファイルを表示

def main(file_path, font_path, font_size):
    image = ndimage.imread(file_path, flatten=True)
    inv_image = 255 - image

    inv_image[inv_image < 1.5 * (inv_image.min())] = 0
    characters = CharacterParser().parse(font_path, font_size)
    image_parser = ImageParser(characters, inv_image, min_correlation=0.8)

    result = image_parser.parse()
    print(result)
    image_parser.print_characters_stats()

コード例 #3

0

ファイルを表示

    def __init__(self, idx, worker_stats, filename_queue, color_buffer_queue,
                 *args, **kwargs):

        super().__init__(*args, **kwargs)
        self.idx = idx
        self.worker_stats = worker_stats
        self.parser = ImageParser(idx, worker_stats)
        self.jpeg_loader = TurboJPEG()
        self.num_images = 0
        self.file_size_sum = 0
        self.pixel_sum = 0
        self.filename_queue = filename_queue
        self.color_buffer_queue = color_buffer_queue
        self.is_running = mp.Value(ctypes.c_bool, True)

コード例 #4

0

ファイルを表示

    def __init__(self, config_file):
        #self.root_dir = os.environ['GEN_MANIFEST_HOME']
        #self.root_dir = 'C:\Users\Nimesh\PycharmProjects\iiif-manifest-museum-II-master'
        #self.file_names_file = file_names_file
        self.config = self.get_config(config_file)
        self.fileNameParser = FileNameParser(self.config)
        self.imageParser = ImageParser()


        self.blackList = set()
        blackList_URL_Folder = os.path.join(os.path.dirname(os.path.realpath(__file__)),'museum-no-reference-urls')
        blacklistFile = os.path.join(blackList_URL_Folder,"blackList.txt")
        self.loadFile(blacklistFile)

        self.fob = open(blacklistFile, 'a')

コード例 #5

0

ファイルを表示

ファイル: index.py プロジェクト: naumov-web/nn-numbers-2

def check_image():
    if request.method == 'POST':
        if model == None:
            return jsonify({'success': 0})

        image_b64 = request.values['imageBase64']
        image_encoded = image_b64.split(',')[1]
        image_bin = base64.decodebytes(image_encoded.encode('utf-8'))

        image_model = ImageModel()
        temp_path = image_model.save_temp(image_bin)

        parser = ImageParser()
        result = parser.parse(temp_path)

        return jsonify(result)

コード例 #6

0

ファイルを表示

ファイル: fetch_beacons.py プロジェクト: Wyosotis/tracking_web_beacons

def get_image_size(data, url):
    size = None
    img_format = None

    try:
        if data != None:
            parser = ImageParser()
            parser.process_data(data)
            size = parser.get_size()
            img_format = parser.get_format()
    except ValueError as err:
        logging.info('ValueError {0}. Uri: {1}'.format(err, url))
        print('\033[91ValueError {0}. Uri: {1}\033[0m'.format(err, url))

    return (size, img_format)

コード例 #7

0

ファイルを表示

ファイル: main.py プロジェクト: jchung05/FloppyBot

def main():
    load_dotenv()

    WEBHOOK = os.getenv('WEBHOOK_URL')
    ROLE = os.getenv('ROLE_ID')
    TESSERACT_PATH = os.getenv('TESSERACT_PATH')

    # region => (x,y,w,h)
    X = os.getenv('X')
    Y = os.getenv('Y')
    W = os.getenv('W')
    H = os.getenv('H')

    bot = FloppyBot(WEBHOOK, ROLE)
    p = ImageParser(r'{}/tesseract.exe'.format(TESSERACT_PATH))

    try:
        while True:
            for _ in range(30):
                start_time = datetime.now()
                print('{}: Parsing...'.format(start_time.strftime("%H:%M:%S")))

                ss = pyautogui.screenshot(region=(X, Y, W, H))

                p.PILtoCV(ss)
                p.maskImage()
                p.inflateImage(2, 2)
                p.doubleSpace()
                p.thresholding()

                for i in range(2):
                    i and p.invertRGB()
                    p.parseScreenshot()

                for timestamp in p.parsed_mvp:
                    for k, v in timestamp.items():
                        bot.enqueue(k, v)
                p.reset()

                bot.sendMessage()

                time_delta = 30 - (datetime.now() - start_time).total_seconds()
                time.sleep(time_delta)

            minute = int(start_time.strftime("%M"))
            bot.garbagePickup(minute)
    except KeyboardInterrupt:
        print("Goodbye Floppy friend!")

コード例 #8

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: american-art/iiif

class App(object):
    def __init__(self, config_file):
        self.config = self.get_config(config_file)
        self.fileNameParser = FileNameParser(self.config)
        self.imageParser = ImageParser()
        self.blackList = set()
        blackList_URL_Folder = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            'museum-no-reference-urls')
        blacklistFile = os.path.join(blackList_URL_Folder, "blackList.txt")
        self.loadFile(blacklistFile)
        self.fob = open(blacklistFile, 'a')

    def get_config(self, config_file):
        # Read default config
        config_file_path = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), config_file)
        with open(config_file_path) as f:
            config = json.loads(f.read())

        if not config_file_path:
            return config

        # Read user config
        with open(config_file_path) as f:
            config.update(json.loads(f.read()))

        return config

    def run(self):
        manifest = self.build_manifest()

    def build_manifest(self):

        config = self.config
        x = ''.join(
            random.choice(string.ascii_uppercase + string.ascii_lowercase +
                          string.digits) for _ in range(16))
        manifestServerRootUrl = config['manifestServerRootUrl']
        #manifestServerRootUrl = manifestServerRootUrl.replace("<", "").replace(">", "")
        manifestId = '%s/manifest/%s' % (manifestServerRootUrl, x)
        manifestLabel = config['manifestLabel']
        sequenceId = '%s/sequence/%s/0' % (manifestServerRootUrl, x)
        license = 'http://licence'
        m = {
            '@context':
            'http://iiif.io/api/presentation/2/context.json',
            '@type':
            'sc:Manifest',
            '@id':
            manifestId,
            'label':
            manifestLabel,
            #'license': license,
            'sequences': [{
                '@type': "sc:Sequence",
                '@id': sequenceId,
                'label': 'Sequence 1',
                'viewingDirection': "left-to-right",
                'canvases': []
            }],
            'seeAlso': {
                '@id': "",
                'format': "text/rdf"
            }
        }

        if config.get('metadata'):
            m['metadata'] = config['metadata']

        res1 = downloadData.sparqlQuery()
        print len(res1)

        cachedFolder = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'museum-cached-data')
        manifestFolder = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), 'manifest')

        for base, res in res1.iteritems():
            m['sequences'][0]['canvases'][:] = []
            cachedFile = os.path.join(cachedFolder, base + '.txt')
            print "Executing " + base + "....."
            self.imageParser.loadFile(cachedFile)
            self.imageParser.openFile(cachedFile)
            for artist in res:
                m['sequences'][0]['canvases'][:] = []
                # print artist
                try:
                    uri_key = artist["x"]["value"].split("/")[-1]
                    m['seeAlso']['@id'] = artist["x"]["value"]
                except:
                    m['seeAlso']['@id'] = "unknown"
                    uri_key = "unknown"
                    pass
                try:
                    f_name = artist["image"]["value"]
                    #if 'ccma' in base:
                    #    f_name = f_name.replace("512", "512,")
                except:
                    f_name = "unknown"
                    pass

                manifest_file_id = self.fileNameParser.geturlid(f_name, base)
                manifestfilename = os.path.join(manifestFolder, base,
                                                manifest_file_id + '.json')
                #if os.path.exists(manifestfilename):
                #    continue
                m['@id'] = '%s/manifest/%s/%s.json' % (manifestServerRootUrl,
                                                       base, manifest_file_id)
                try:
                    caption = artist["caption"]["value"]
                except:
                    caption = "unknown"
                    pass
                if f_name in self.blackList:
                    continue
                file_info = self.fileNameParser.parse(f_name, base, caption,
                                                      uri_key)
                if not file_info:
                    self.fob.write(base + '\t' + f_name + '\n')
                    continue

                canvas = self.build_canvas(file_info, caption, base,
                                           manifest_file_id)
                if canvas:
                    m['sequences'][0]['canvases'].append(canvas)
                else:
                    self.fob.write(base + '\t' + file_info['file_name'] + '\n')
                m['label'] = caption

                with open(manifestfilename, 'w') as outfile:
                    json.dump(m, outfile)

            self.imageParser.close()

        return m

    def build_canvas(self, info, caption, museum, manifest_file_id):
        license = 'http://licence'
        try:
            image_info = self.imageParser.size(info['file_name'], museum,
                                               manifest_file_id)
            width = int(image_info["width"])
            height = int(image_info["height"])
            canvas_width = width
            canvas_height = height
            if canvas_width < 1200 or canvas_height < 1200:
                canvas_width *= 2
                canvas_height *= 2
            thumbnail = self.config['manifestServerRootUrl'] + "/" + str(
                image_info["thumbnail"])
        except:
            width = -1
            height = -1
            return None

        c = {
            '@type':
            'sc:Canvas',
            '@id':
            info['canvas_id'],
            'label':
            caption,
            'width':
            canvas_width,
            'height':
            canvas_height,
            #'license': license,
            'images': [{
                '@type': 'oa:Annotation',
                '@id': info['image_id'],
                'motivation': 'sc:painting',
                'on': info['canvas_id'],
                'resource': {
                    '@type': 'dctypes:Image',
                    '@id': info['image_resource_id'],
                    'format': 'image/jpeg',
                    'width': width,
                    'height': height,
                    'service': {
                        '@id': info['image_service_id'],
                        "@context": "http://iiif.io/api/image/2/context.json",
                        "profile": "http://iiif.io/api/image/2/level1.json"
                        # 'dcterms:conformsTo': 'http://library.stanford.edu/iiif/image-api/1.1/conformance.html#level1'
                    }
                }
            }],
            'thumbnail':
            thumbnail
        }
        if "ccma" not in museum:
            del c["images"][0]["resource"]["service"]
        return c

    def loadFile(self, fileName):
        if not os.path.exists(fileName):
            return

        with open(fileName, "r") as ins:
            for line in ins:
                line = line.strip('\n')
                arr = line.split('\t')
                if arr[1] not in self.blackList:
                    self.blackList.add(arr[1])

コード例 #9

0

ファイルを表示

with open(os.path.join(project_path, 'config.json')) as config_file:
    config = json.load(config_file)

app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = int(
    config['max_image_upload_size_mb']) * 1024 * 1024  # MB
CORS(app)

wiki_meta_df = pandas.read_csv(os.path.join(project_path,
                                            config['wiki_meta_csv_path']),
                               index_col=0)
imdb_meta_df = pandas.read_csv(os.path.join(project_path,
                                            config['imdb_meta_csv_path']),
                               index_col=0)

image_parser = ImageParser(config['open_face_models_dir_path'])
image_parser.init()

representation_store = RepresentationStore(image_parser)
representation_store.load_representations(
    os.path.join(project_path, config['representations_file_path']))

image_matcher = ImageMatcher(representation_store)

representation_count = len(representation_store.get_representations())


@app.route('/randomImages', methods=['GET'])
def get_random_images():
    out_paths = [
        row['full_path'] for index, row in imdb_meta_df.sample(12).iterrows()

コード例 #10

0

ファイルを表示

class ImageLoader(mp.Process):
    def __init__(self, idx, worker_stats, filename_queue, color_buffer_queue,
                 *args, **kwargs):

        super().__init__(*args, **kwargs)
        self.idx = idx
        self.worker_stats = worker_stats
        self.parser = ImageParser(idx, worker_stats)
        self.jpeg_loader = TurboJPEG()
        self.num_images = 0
        self.file_size_sum = 0
        self.pixel_sum = 0
        self.filename_queue = filename_queue
        self.color_buffer_queue = color_buffer_queue
        self.is_running = mp.Value(ctypes.c_bool, True)

    def read_png(self, buf):
        x = np.frombuffer(buf, dtype=np.uint8)
        img_np = cv2.imdecode(x, cv2.IMREAD_UNCHANGED)
        if img_np is not None:
            if img_np.dtype == np.uint16 and img_np.max() > 255:
                img_np = (img_np // 256).astype(np.uint8)
        return img_np

    def read_jpeg(self, buf):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            return self.jpeg_loader.decode(buf)

    def read_image(self, filename, buf):

        if filename.endswith(".png"):
            bgr_array = self.read_png(buf)
        else:
            try:
                bgr_array = self.read_jpeg(buf)
            except OSError:
                bgr_array = None
        if bgr_array is not None:
            if len(bgr_array.shape) > 2 and bgr_array.shape[2] == 4:
                # return None
                # print("need to realign memory")
                bgr_array = np.ascontiguousarray(bgr_array[:, :, :3])
            if len(bgr_array.shape) == 2:
                new_array = np.zeros(bgr_array.shape + (3, ), dtype=np.uint8)
                for i in range(3):
                    new_array[:, :, i] = bgr_array
                bgr_array = new_array
                # print(bgr_array.shape)
        return bgr_array

    def print_stats(self, i, t0, t1, cl0, cl1):
        mp = self.pixel_sum / 1024**2
        mb = self.file_size_sum / 1024**2
        mp_per_second = mp / (t1 - t0)
        mb_per_second = mb / (t1 - t0)
        print(f"\r{i:4d}", end="\t", flush=True)
        print(f"{mp_per_second:8.1f}MP/s", end="\t", flush=True)
        print(f"{mb_per_second:.2f}MB/s", end="\t")
        print(f"({(cl1-cl0) * 1e3:6.1f}ms) ({mp:7.1f}MP)", end="")

    def load_single_image(self, filename, in_file):
        # cl0 = clock()
        buf = in_file.read()
        bgr_array = self.read_image(filename, buf)
        if bgr_array is None:
            return
        assert bgr_array.dtype == np.uint8
        self.parser.add_image(bgr_array)
        # self.parser_queue.put(bgr_array)
        # self.image_parser.add_image(bgr_array)
        # cl1 = clock()
        self.file_size_sum += os.path.getsize(directory + filename)
        self.pixel_sum += bgr_array.size // 3
        # print(f"{filename} parsed")
        # t1 = time.time()
        # self.print_stats(i, t0, t1, cl0, cl1)

    def run(self):
        self.parser.compile()
        while self.is_running.value:
            # while True:
            try:
                image_data = self.filename_queue.get(True, 1)
            except queue.Empty:
                continue
            filename = f"{image_data['filename']}.{image_data['filetype']}"
            with open(directory + filename, 'rb') as in_file:
                self.load_single_image(filename, in_file)
            # time.sleep(1)
            # print(f"Completed {image_data}")
        self.parser.finalize_parser()
        self.color_buffer_queue.put(self.parser.col_buffer)

コード例 #11

0

ファイルを表示

ファイル: main.py プロジェクト: alojzmilicevic/mazes

from image_parser import ImageParser
from window.Frame import Frame
from maze import Maze
from breadthFirst import solve as bf

SIZE = 1024

file_name = "images/8x8.png"
parser = ImageParser(file_name)
data = parser.parse_image()
maze = Maze(data, SIZE)
maze.solve(solver=bf)

parser.print_info()
maze.print_info()


class Window(Frame):
    def draw(self):
        render_target = self.get_render_target()
        maze.draw(render_target)
        self.run()


window = Window(SIZE)
window.init()

コード例 #12

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: yale-web-technologies/gen-manifest

 def __init__(self, file_names_file, config_file):
     self.root_dir = os.environ['GEN_MANIFEST_HOME']
     self.file_names_file = file_names_file
     self.config = self.get_config(config_file)
     self.fileNameParser = FileNameParser(self.config)
     self.imageParser = ImageParser()

コード例 #13

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: yale-web-technologies/gen-manifest

class App(object):
    def __init__(self, file_names_file, config_file):
        self.root_dir = os.environ['GEN_MANIFEST_HOME']
        self.file_names_file = file_names_file
        self.config = self.get_config(config_file)
        self.fileNameParser = FileNameParser(self.config)
        self.imageParser = ImageParser()

    def get_config(self, config_file):
        # Read default config
        with open(os.path.join(self.root_dir, 'config.json')) as f:
            config = json.loads(f.read())

        if not config_file:
            return config

        # Read user config
        with open(config_file) as f:
            config.update(json.loads(f.read()))

        return config

    def run(self):
        files = []
        with open(self.file_names_file) as f:
            for line in f:
                file_name = line.strip()
                files.append(file_name)
        manifest = self.build_manifest(files)
        print json.dumps(manifest, indent=2)

    def build_manifest(self, files):
        config = self.config
        manifestServerRootUrl = config['manifestServerRootUrl']
        projectPath = config['projectPath']

        manifestId = '%s/manifest/%s' % (manifestServerRootUrl, projectPath)
        manifestLabel = config['manifestLabel']
        sequenceId = '%s/sequence/%s/0' % (manifestServerRootUrl, projectPath)

        m = {
            '@context':
            'http://www.shared-canvas.org/ns/context.json',
            '@type':
            'sc:Manifest',
            '@id':
            manifestId,
            'label':
            manifestLabel,
            'sequences': [{
                '@type': "sc:Sequence",
                '@id': sequenceId,
                'label': 'Sequence 1',
                'viewingDirection': "Left-to-Right",
                'canvases': []
            }],
            'structures': []
        }

        if config.get('metadata'):
            m['metadata'] = config['metadata']

        old_chapter = -1

        for file_name in files:
            file_info = self.fileNameParser.parse(file_name)
            if not file_info:
                continue
            canvas = self.build_canvas(file_info)
            m['sequences'][0]['canvases'].append(canvas)

            if config.get('createChapters') == 'y':
                chapter = file_info['chapter_padded']
                if chapter != old_chapter:
                    current_range = self.create_range(file_info)
                    m['structures'].append(current_range)
                    old_chapter = chapter
                current_range['canvases'].append(file_info['canvas_id'])

        return m

    def build_canvas(self, info):
        width, height = self.imageParser.size(info['file_name'])

        c = {
            '@type':
            'sc:Canvas',
            '@id':
            info['canvas_id'],
            'label':
            info['canvas_label'],
            'width':
            width,
            'height':
            height,
            'images': [{
                '@type': 'oa:Annotation',
                '@id': info['image_id'],
                'motivation': 'sc:painting',
                'on': info['canvas_id'],
                'resource': {
                    '@type': 'dctypes:Image',
                    '@id': info['image_resource_id'],
                    'format': 'image/jpeg',
                    'width': width,
                    'height': height,
                    'service': {
                        '@id':
                        info['image_service_id'],
                        'dcterms:conformsTo':
                        'http://library.stanford.edu/iiif/image-api/1.1/conformance.html#level1'
                    }
                }
            }]
        }
        return c

    def create_range(self, file_info):
        config = self.config
        range_id = '%s/range/%s/ch%s' % (config['manifestServerRootUrl'],
                                         config['projectPath'],
                                         file_info['chapter_padded'])
        label = '%s %s' % (config['chapterLabel'],
                           file_info['chapter_unpadded'])
        return {
            '@id': range_id,
            '@type': 'sc:Range',
            'label': label,
            'canvases': []
        }

コード例 #14

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: yale-web-technologies/gen-manifest

class App(object):

    def __init__(self, file_names_file, config_file):
        self.root_dir = os.environ['GEN_MANIFEST_HOME']
        self.file_names_file = file_names_file
        self.config = self.get_config(config_file)
        self.fileNameParser = FileNameParser(self.config)
        self.imageParser = ImageParser()

    def get_config(self, config_file):
        # Read default config
        with open(os.path.join(self.root_dir, 'config.json')) as f:
            config = json.loads(f.read())
            
        if not config_file:
            return config

        # Read user config
        with open(config_file) as f:
            config.update(json.loads(f.read()))

        return config

    def run(self):
        files = []
        with open(self.file_names_file) as f:
            for line in f:
                file_name = line.strip()
                files.append(file_name)
        manifest = self.build_manifest(files)
        print json.dumps(manifest, indent=2)

    def build_manifest(self, files):
        config = self.config
        manifestServerRootUrl = config['manifestServerRootUrl']
        projectPath = config['projectPath']

        manifestId = '%s/manifest/%s' % (manifestServerRootUrl, projectPath)
        manifestLabel = config['manifestLabel']
        sequenceId = '%s/sequence/%s/0' % (manifestServerRootUrl, projectPath)
        
        m = {
            '@context': 'http://www.shared-canvas.org/ns/context.json',
            '@type': 'sc:Manifest',
            '@id': manifestId,
            'label': manifestLabel,
            'sequences': [
            {
                '@type': "sc:Sequence",
                '@id': sequenceId,
                'label': 'Sequence 1',
                'viewingDirection': "Left-to-Right",
                'canvases': []
            }
            ],
            'structures': []
        };

        if config.get('metadata'):
            m['metadata'] = config['metadata']

        old_chapter = -1

        for file_name in files:
            file_info = self.fileNameParser.parse(file_name)
            if not file_info:
                continue
            canvas = self.build_canvas(file_info)
            m['sequences'][0]['canvases'].append(canvas)

            if config.get('createChapters') == 'y':
                chapter = file_info['chapter_padded']
                if chapter != old_chapter:
                    current_range = self.create_range(file_info)
                    m['structures'].append(current_range)
                    old_chapter = chapter
                current_range['canvases'].append(file_info['canvas_id'])
            
        return m

    def build_canvas(self, info):
        width, height = self.imageParser.size(info['file_name'])

        c = {
            '@type': 'sc:Canvas',
            '@id': info['canvas_id'],
            'label': info['canvas_label'],
            'width': width,
            'height': height,
            'images': [
                {
                    '@type': 'oa:Annotation',
                    '@id': info['image_id'],
                    'motivation': 'sc:painting',
                    'on': info['canvas_id'],
                    'resource': {
                        '@type': 'dctypes:Image',
                        '@id': info['image_resource_id'],
                        'format': 'image/jpeg',
                        'width': width,
                        'height': height,
                        'service': {
                            '@id': info['image_service_id'],
                            'dcterms:conformsTo': 'http://library.stanford.edu/iiif/image-api/1.1/conformance.html#level1'
                        }
                    }
                }
            ]
        }
        return c
        
    def create_range(self, file_info):
        config = self.config
        range_id = '%s/range/%s/ch%s' % (config['manifestServerRootUrl'], config['projectPath'], file_info['chapter_padded'])
        label = '%s %s' % (config['chapterLabel'], file_info['chapter_unpadded'])
        return {
          '@id': range_id,
          '@type': 'sc:Range',
          'label': label,
          'canvases': []
        }

コード例 #15

0

ファイルを表示

from image_parser import ImageParser
from representation_store import RepresentationStore
import pandas

path_to_open_face_models = '../openface/models'
images_meta_csv_path = '../wiki-faces-meta.csv'
images_dir_path = '../wiki_crop'

images_meta_df = pandas.read_csv(images_meta_csv_path)

image_parser = ImageParser(path_to_open_face_models)
image_parser.init()

representation_store = RepresentationStore(image_parser)
reps = representation_store.calculate_representations(images_meta_df,
                                                      images_dir_path)
representation_store.save_representations(reps, 'wiki-reps-new.npy')

コード例 #16

0

ファイルを表示

ファイル: gen_manifest.py プロジェクト: yale-web-technologies/gen-manifest

 def __init__(self, file_names_file, config_file):
     self.root_dir = os.environ['GEN_MANIFEST_HOME']
     self.file_names_file = file_names_file
     self.config = self.get_config(config_file)
     self.fileNameParser = FileNameParser(self.config)
     self.imageParser = ImageParser()

コード例 #17

0

ファイルを表示

class App(object):
    def __init__(self, config_file):
        #self.root_dir = os.environ['GEN_MANIFEST_HOME']
        #self.root_dir = 'C:\Users\Nimesh\PycharmProjects\iiif-manifest-museum-II-master'
        #self.file_names_file = file_names_file
        self.config = self.get_config(config_file)
        self.fileNameParser = FileNameParser(self.config)
        self.imageParser = ImageParser()


        self.blackList = set()
        blackList_URL_Folder = os.path.join(os.path.dirname(os.path.realpath(__file__)),'museum-no-reference-urls')
        blacklistFile = os.path.join(blackList_URL_Folder,"blackList.txt")
        self.loadFile(blacklistFile)

        self.fob = open(blacklistFile, 'a')

    def get_config(self, config_file):
        # Read default config
        config_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), config_file)
        with open(config_file_path) as f:
            config = json.loads(f.read())

        if not config_file_path:
            return config

        # Read user config
        with open(config_file_path) as f:
            config.update(json.loads(f.read()))

        return config

    def run(self):
        files = []
        '''
        with open(self.file_names_file) as f:
            for line in f:
                file_name = line.strip()
                files.append(file_name)
        '''
        manifest = self.build_manifest(files)
        #print json.dumps(manifest, indent=2)

    def build_manifest(self, files):
        config = self.config
        x = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(16))
        manifestServerRootUrl = config['manifestServerRootUrl']
        projectPath = config['projectPath']
        manifestServerRootUrl = manifestServerRootUrl.replace("<","").replace(">","")

        manifestId = '%s/manifest/%s' % (manifestServerRootUrl, x)
        manifestLabel = config['manifestLabel']
        sequenceId = '%s/sequence/%s/0' % (manifestServerRootUrl, x)

        m = {
            #'@context': 'http://www.shared-canvas.org/ns/context.json',
            '@context': 'http://iiif.io/api/presentation/2/context.json',
            '@type': 'sc:Manifest',
            '@id': manifestId,
            'label': manifestLabel,
            'sequences': [
                {
                    '@type': "sc:Sequence",
                    '@id': sequenceId,
                    'label': 'Sequence 1',
                    'viewingDirection': "left-to-right",
                    'canvases': []
                }
            ],
            'structures': []
        };

        if config.get('metadata'):
            m['metadata'] = config['metadata']


        res1 = downloadData.sparqlQuery()
        print len(res1)

        cachedFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),'museum-cached-data')
        manifestFolder = os.path.join(os.path.dirname(os.path.realpath(__file__)),'museum-manifests')

        for base,res in res1.iteritems():
            m['sequences'][0]['canvases'][:] = []
            cachedFile = os.path.join(cachedFolder,base + '.txt')
            print "Executing " + base + "....."
            self.imageParser.loadFile(cachedFile)
            self.imageParser.openFile(cachedFile)
            #for mus in res:
            for artist in res:
                    #print artist
                    try:
                        uri_key =  artist["x"]["value"].split("/")[-1]
                    except:
                        uri_key = "unknown"
                        pass
                    try:
                        f_name = artist["image"]["value"]
                        if 'ccma' in base:
                            f_name = f_name.replace("512","512,")
                    except:
                        f_name = "unknown"
                        pass
                    try:
                        caption = artist["caption"]["value"]
                    except:
                        caption = "unknown"
                        pass
                    if f_name in self.blackList:
                        continue
                    file_info = self.fileNameParser.parse(f_name, base, caption, uri_key)
                    if not file_info:
                        self.fob.write(base + '\t' + f_name + '\n')
                        continue

                    #if file_info['file_name'] in self.blackList:
                        #continue
                    canvas = self.build_canvas(file_info, caption)
                    if canvas:
                        m['sequences'][0]['canvases'].append(canvas)
                    else:
                        museum = 'unknown'
                        self.fob.write(base + '\t' + file_info['file_name'] + '\n')
            self.imageParser.close()
            manifestFile = os.path.join(manifestFolder,base + '.json')
            with open(manifestFile, 'w') as outfile:
                json.dump(m, outfile)
        return m

    def build_canvas(self, info, caption):
        license = '<licence>'
        try:
            image_info = self.imageParser.size(info['file_name'])
            width = int(image_info["width"])
            height = int(image_info["height"])
        except:
            width = -1
            height = -1
            return None


        c = {
            '@type': 'sc:Canvas',
            '@id': info['canvas_id'],
            'label':caption,
            'width': width,
            'height': height,
            #'license': license,
            'metadata': [
                {
                    'label': 'caption',
                    'value': caption
                }

            ],
            'images': [
                {
                    '@type': 'oa:Annotation',
                    '@id': info['image_id'],
                    'motivation': 'sc:painting',
                    'on': info['canvas_id'],
                    'resource': {
                        '@type': 'dctypes:Image',
                        '@id': info['image_resource_id'],
                        'format': 'image/jpeg',
                        'width': width,
                        'height': height,
                        'service': {
                            '@id': info['image_service_id'],
                            #'dcterms:conformsTo': 'http://library.stanford.edu/iiif/image-api/1.1/conformance.html#level1'
                        }
                    }
                }
            ],

            'thumbnail':info['thumbnail_url']
        }
        return c

    '''
            'thumbnail': {
                '@id': info['thumbnail_id'],
                '@type': 'dctypes:Image',

                'service': {
                    '@context': "http://iiif.io/api/image/2/context.json",
                    '@id': info['image_service_id'],
                    'profile': "http://iiif.io/api/image/2/level1.json"
                }
                     }

    '''
    '''
    def create_range(self, file_info):
        config = self.config
        range_id = '%s/range/%s/ch%s' % (
        config['manifestServerRootUrl'], config['projectPath'], file_info['chapter_padded'])
        label = '%s %s' % (config['chapterLabel'], file_info['chapter_unpadded'])
        return {
            '@id': range_id,
            '@type': 'sc:Range',
            'label': label,
            'canvases': []
        }
    '''
    def loadFile(self, fileName):
        if not os.path.exists(fileName):
            return

        with open(fileName, "r") as ins:
            for line in ins:
                line = line.strip('\n')
                arr = line.split('\t')
                if arr[1] not in self.blackList:
                    self.blackList.add(arr[1])