Ejemplo n.º 1
0
Archivo: scan.py Proyecto: iaga84/yalse
def check_anomalies():
    from yalse_core.app import application
    with application.app_context():
        warning_list = []
        for file in db.session.query(File).distinct(File.file_hash):
            if db.session.query(File.id).filter_by(file_hash=file.file_hash, duplicate=False, missing=False, anomaly=False).count() < 1:
                warning_list.append(file.file_path)
        if warning_list:
            send_email(f"WARNING! These files are an anomaly: {warning_list}")
Ejemplo n.º 2
0
def reset_library():
    from yalse_core.app import application
    with application.app_context():
        if not database_exists(application.config['SQLALCHEMY_DATABASE_URI']):
            create_database(application.config['SQLALCHEMY_DATABASE_URI'])
        try:
            File.__table__.drop(db.engine)
        except:
            pass
        db.create_all()
        reset_index()
    return {'code': 200, 'message': "reset"}, 200
Ejemplo n.º 3
0
Archivo: scan.py Proyecto: iaga84/yalse
def delete_missing_files():
    from yalse_core.app import application
    with application.app_context():
        missing_files = db.session.query(File).filter_by(missing=True).all()
        deleted_files = []
        for missing in missing_files:
            try:
                if Path(missing.file_path).exists():
                    raise Exception
                db.session.delete(missing)
                deleted_files.append(missing.file_path)
            except Exception as e:
                logging.error(f"Error in removing missing file: {missing.file_path}, {e}")
        db.session.commit()
        if deleted_files:
            send_email(f"Deleting MISSING records: {deleted_files}")
Ejemplo n.º 4
0
Archivo: scan.py Proyecto: iaga84/yalse
def delete_duplicate_files(dry_run):
    from yalse_core.app import application
    with application.app_context():
        duplicate_files = db.session.query(File).filter_by(duplicate=True).all()
        deleted_files = []
        for duplicate in duplicate_files:
            try:
                # check that exists at least one valid file for this hash
                original_file = db.session.query(File).filter(
                    File.file_path != duplicate.file_path,
                    File.file_hash == duplicate.file_hash,
                    File.duplicate == False,
                    File.missing == False,
                    File.anomaly == False
                ).first()

                if not Path(original_file.file_path).exists():
                    raise Exception
                if not Path(duplicate.file_path).exists():
                    raise Exception
                if not original_file.file_hash == duplicate.file_hash:
                    raise Exception
                if not original_file.file_path != duplicate.file_path:
                    raise Exception
                if original_file.duplicate:
                    raise Exception
                if original_file.missing:
                    raise Exception
                if original_file.anomaly:
                    raise Exception
                if not duplicate.duplicate:
                    raise Exception
                if duplicate.missing:
                    raise Exception
                if duplicate.anomaly:
                    raise Exception

                logging.warning(f"Deleting: {duplicate.file_path}")
                if not dry_run:
                    os.remove(duplicate.file_path)
                    db.session.delete(duplicate)
                deleted_files.append(duplicate.file_path)
            except Exception as e:
                logging.error(f"Error in deleting duplicate file: {duplicate.file_path}, {e}")
        db.session.commit()
        if deleted_files:
            send_email(f"Deleting {len(deleted_files)} DUPLICATE files: {deleted_files}")
Ejemplo n.º 5
0
Archivo: scan.py Proyecto: iaga84/yalse
def files_scan(dry_run):
    from yalse_core.app import application
    with application.app_context():
        db.session.query(File).update({File.duplicate: False})
        for file in db.session.query(File).all():
            if not Path(file.file_path).exists():
                file.missing = True
        for r, d, f in os.walk(DOCUMENTS_DIR):
            for file in f:
                file_path = os.path.join(r, file)
                file_hash = SHA256.hash_file(file_path)
                path_exists = db.session.query(File.id).filter_by(file_path=file_path).scalar() is not None
                hash_exists = db.session.query(File.id).filter(
                    File.file_path != file_path,
                    File.file_hash == file_hash,
                    File.duplicate == False,
                    File.missing == False,
                    File.anomaly == False
                ).count() > 0

                if not path_exists:
                    record = File(
                        file_hash=file_hash,
                        file_path=file_path,
                        duplicate=hash_exists,
                    )
                    db.session.add(record)
                else:
                    file = db.session.query(File).filter_by(file_path=file_path).first()
                    file.missing = False
                    file.duplicate = hash_exists
                    if file_hash != file.file_hash:
                        file.anomaly = True
                        file.file_hash = file_hash
                    else:
                        file.anomaly = False

        db.session.commit()

        check_anomalies()
        delete_missing_files()
        delete_duplicate_files(dry_run)

        index_files()
Ejemplo n.º 6
0
Archivo: scan.py Proyecto: iaga84/yalse
def index_files():
    from yalse_core.app import application
    with application.app_context():
        queue = Queue(connection=Redis('redis'))
        for file in db.session.query(File).all():
            queue.enqueue(index_document, file.file_hash, file.file_path, job_timeout=60)
Ejemplo n.º 7
0
def download_file(file_hash):
    from yalse_core.app import application
    with application.app_context():
        file = db.session.query(File).filter_by(file_hash=file_hash).first()
        return send_file(file.file_path, as_attachment=True)