def officeConverter(item):
    if not exists(item.get_file_path()+'.presform.pdf'):
        fileName, fileExtension = splitext(item.get_file_path())
        mkdirArgs = ['mkdir', '-p', '/tmp/officeConv']
        mkdirCommand = BashCommand(mkdirArgs)
        mkdirCommand.set_timeout(timeout)
        mkdirCommand.run_command()
        officeConvertArgs = ['/Applications/LibreOffice.app/Contents/MacOS/soffice',
                             '--headless', '--convert-to', 'pdf',
                             '--outdir', '/tmp/officeConv',
                             item.get_file_path()]
        officeConvertCommand = BashCommand(officeConvertArgs)
        officeConvertCommand.set_timeout(timeout)
        officeConvertCommand.run_command()
        cpCommandArgs = ['cp', '/tmp/officeConv/'+basename(fileName)+'.pdf',
                         item.get_file_path()+'.presform.pdf']
        cpCommand = BashCommand(cpCommandArgs)
        cpCommand.run_command()
        rmCommandArgs = ['rm', '-r', '/tmp/officeConv']
        rmCommand = BashCommand(rmCommandArgs)
        rmCommand.run_command()
        logger.debug(officeConvertCommand.get_data())
        return officeConvertCommand.get_data()
    else:
        logger.info("Office (PDF) preservation format for file exists. " +
                    "Not Clobbering.")
        return(None, None)
def imageConverter(item):
    if not exists(item.get_file_path()+'.presform.tif'):
        imageConvertArgs = ['ffmpeg', '-n', '-i', item.get_file_path(),
                            item.get_file_path()+'.presform.tif']
        imageConvertCommand = BashCommand(imageConvertArgs)
        imageConvertCommand.set_timeout(timeout)
        imageConvertCommand.run_command()
        logger.debug(imageConvertCommand.get_data())
        return imageConvertCommand.get_data()
    else:
        logger.info("Image (tif) preservaiton format for file exists. " +
                    "Not Clobbering.")
        return(None, None)
def audioConverter(item):
    if not exists(item.get_file_path()+'.presform.wav'):
        audioConvertArgs = ['ffmpeg', '-n', '-i', item.get_file_path(),
                            item.get_file_path()+'.presform.wav']
        audioConvertCommand = BashCommand(audioConvertArgs)
        audioConvertCommand.set_timeout(timeout)
        audioConvertCommand.run_command()
        logger.debug(audioConvertCommand.get_data())
        return audioConvertCommand.get_data()
    else:
        logger.info("Audio (wav) preservation format for file exists. " +
                    "Not Clobbering.")
        return (None, None)
def htmlConverter(item):
    if not exists(item.get_file_path()+'.presform.pdf'):
        originalFilePath = item.get_file_path()
        intermediaryFilePath = originalFilePath+'.intermediary.pdf'
        wkhtmltopdfArgs = ['wkhtmltopdf', item.get_file_path(),
                           intermediaryFilePath]
        wkhtmltopdfCommand = BashCommand(wkhtmltopdfArgs)
        wkhtmltopdfCommand.set_timeout(timeout)
        wkhtmltopdfCommand.run_command()
        i = Item(intermediaryFilePath, root)
        itemStack.append(i)
        return wkhtmltopdfCommand.get_data()
    else:
        return (None, None)
def videoConverter(item):
    if not exists(item.get_file_path()+'.presform.avi'):
        videoConvertArgs = ['ffmpeg', '-n', '-i', item.get_file_path(),
                            '-vcodec', 'rawvideo', '-acodec', 'pcm_u24le',
                            '-pix_fmt', 'uyvy422', '-vtag', '2vuy',
                            item.get_file_path()+".presform.avi"]
        videoConvertCommand = BashCommand(videoConvertArgs)
        videoConvertCommand.set_timeout(timeout)
        videoConvertCommand.run_command()
        logger.debug(videoConvertCommand.get_data())
        return videoConvertCommand.get_data()
    else:
        logger.info("Video (avi) preservation format for file exists. " +
                    "Not Clobbering.")
        return (None, None)
def zipConverter(item):
    if not exists(item.get_file_path()+'.presform.extracted'):
        unzipCommandArgs = ['7z', 'x',
                            '-o'+item.get_file_path()+'.presform.extracted',
                            item.get_file_path()]
        unzipCommand = BashCommand(unzipCommandArgs)
        unzipCommand.set_timeout(timeout)
        unzipCommand.run_command()
        if exists(item.get_file_path()+'.presform.extracted'):
            b = Batch(root, item.get_file_path()+'.presform.extracted')
            for item in b.find_items(from_directory=True):
                itemStack.append(item)
        return unzipCommand.get_data()
    else:
        logger.info("Already extracted.")
        return(None, None)
def gifConverter(item):
    if not exists(item.get_file_path()+'.presform'):
        mkdirArgs = ['mkdir', item.get_file_path()+".presform"]
        mkdirCommand = BashCommand(mkdirArgs)
        mkdirCommand.run_command()
        gifConvertArgs = ['ffmpeg', '-n', '-i', item.get_file_path(),
                          item.get_file_path() +
                          '.presform/output%04d.presform.tif']
        gifConvertCommand = BashCommand(gifConvertArgs)
        gifConvertCommand.set_timeout(timeout)
        gifConvertCommand.run_command()
        logger.debug(gifConvertCommand.get_data())
        return gifConvertCommand.get_data()
    else:
        logger.info("Image (tif) preservation format for file exists. " +
                    "Not Clobbering.")
        return(None, None)
def main():
    # start of parser boilerplate
    parser = ArgumentParser(description="This module is meant to take a batch of files (probably an accession in place) and generate the technical metadata for it.",
                            epilog="Copyright University of Chicago; " + \
                            "written by "+__author__ + \
                            " "+__email__)

    parser.add_argument("-v", help="See the version of this program",
                        action="version", version=__version__)
    # let the user decide the verbosity level of logging statements
    # -b sets it to INFO so warnings, errors and generic informative statements
    # will be logged
    parser.add_argument( \
                         '-b','-verbose',help="set verbosity for logging to stdout",
                         action='store_const',dest='log_level',
                         const=INFO,default='INFO' \
    )
    # -d is debugging so anything you want to use a debugger gets logged if you
    # use this level
    parser.add_argument( \
                         '-d','--debugging',help="set debugging logging",
                         action='store_const',dest='log_level',
                         const=DEBUG,default='INFO' \
    )
    # optionally save the log to a file. set a location or use the default constant
    parser.add_argument( \
                         '-l','--log_loc',help="save logging to a file",
                         dest="log_loc",
                         \
    )
    parser.add_argument( \
                         '-t','--timeout',help="set a timeout in seconds for any single bash command",
                         dest='timeout',default=3600,type=int \
    )
    parser.add_argument("item", help="Enter a noid for an accession or a " + \
                        "directory path that you need to validate against" + \
                        " a type of controlled collection"
    )
    parser.add_argument("root",help="Enter the root of the directory path",
                        action="store"
    )
    args = parser.parse_args()
    log_format = Formatter( \
                            "[%(levelname)s] %(asctime)s  " + \
                            "= %(message)s",
                            datefmt="%Y-%m-%dT%H:%M:%S" \
    )
    global logger
    logger = getLogger( \
                        "lib.uchicago.repository.logger" \
    )
    logger.setLevel(DEBUG)
    ch = StreamHandler()
    ch.setFormatter(log_format)
    ch.setLevel(args.log_level)
    logger.addHandler(ch)
    if args.log_loc:
        fh = FileHandler(args.log_loc)
        fh.setFormatter(log_format)
        logger.addHandler(fh)
    try:
        fitscommand="fits"
        md5command="md5"
        shacommand="sha256"

        b = Batch(abspath(args.root), abspath(args.item))
        for item in b.find_items(from_directory=True):
            if ".fits.xml" in item.find_file_name() or ".stif.txt" in item.find_file_name():
                continue
            item.find_technical_metadata()
            if item.has_technical_md:
                logger.info(item.get_file_path()+" already has technical metadata. Continuing.")
                continue
            else:
                logger.info("Attempting technical metadata generation for: "+item.get_file_path())
                fitsArgs=[fitscommand,'-i',item.get_file_path(),'-o',item.get_file_path()+'.fits.xml']
                fitsCommand=BashCommand(fitsArgs)
                fitsCommand.set_timeout(args.timeout)
                try:
                    logger.info("Attempting FITS generation for: "+item.get_file_path())
                    result=fitsCommand.run_command()
                    if isinstance(result[1],Exception):
                        raise result[1]
                    assert(exists(item.get_file_path()+'.fits.xml'))
                    logger.info("FITS generated for: "+item.get_file_path()) 
                except TimeoutExpired:
                    logger.warn("FITS generation timed out")
                    logger.info("Attempting STIF generation")
                    statArgs=['stat',item.get_file_path()]
                    statCommand=BashCommand(statArgs)
                    statCommand.set_timeout(args.timeout)

                    mimeArgs=['file','-i',item.get_file_path()]
                    mimeCommand=BashCommand(mimeArgs)
                    mimeCommand.set_timeout(args.timeout)

                    fileArgs=['file',item.get_file_path()]
                    fileCommand=BashCommand(fileArgs)
                    fileCommand.set_timeout(args.timeout)
                    
                    assert(statCommand.run_command()[0])
                    assert(mimeCommand.run_command()[0])
                    assert(fileCommand.run_command()[0])

                    md5hash=item.find_md5_hash()
                    shahash=item.find_sha256_hash

                    with open(item.get_file_path()+'.stif.txt','w') as f:
                        f.write(statCommand.get_data()[1].stdout.decode(encoding='UTF-8')+ \
                                mimeCommand.get_data()[1].stdout.decode(encoding='UTF-8')+ \
                                fileCommand.get_data()[1].stdout.decode(encoding='UTF-8')+ \
                                "md5: " + item.find_md5_hash() + '\n'+ \
                                "sha256: " + item.find_sha256_hash() \
                               )
                    assert(exists(item.get_file_path()+'.stif.txt'))
                    logger.info("STIF generated for: "+item.get_file_path())
                item.find_technical_metadata()
                assert(item.has_technical_md)
                logger.info("Technical metadata generation complete for: "+item.get_file_path())
        return 0
    except KeyboardInterrupt:
        logger.error("Program aborted manually")
        return 131