Exemple #1
0
def analyze(pfile_pk, filename, agent_pk, model, db):
    pfile = -1
    try:
        pfile = int(pfile_pk)
    except ValueError:
        print >> sys.stdout, 'ERROR: Provided pfile_pk is not a number: %r' % line
        return -1

    path = libfosspython.repMkPath('files', filename)
    if (not os.path.exists(path)):
        print >> sys.stdout, 'ERROR: File not found. path=%s' % (path)
        return -1
    text = open(path).read(READMAX)
    offsets = library.label_file(text,model)
    if len(offsets) == 0:
        sql = """INSERT INTO copyright (agent_fk, pfile_fk, copy_startbyte, copy_endbyte, content, hash, type)
                 VALUES (%d, %d, NULL, NULL, NULL, NULL, 'statement')""" % (agent_pk, pfile)
        result = db.access(sql)
        if result != 0:
            print >> sys.stdout, "ERROR: DB Access error, returned %d.\nERROR: DB STATUS: %s\nERROR: DB ERRMSG: %s\nERROR: sql=%sERROR: filename=%s" % (result, db.status(), db.errmsg(), sql, filename)
            return -1
    else:
        for i in range(len(offsets)):
            str = text[offsets[i][0]:offsets[i][1]]
            str = str.decode('ascii', 'ignore')     # make sure that it is using ascii encoding
            pd = library.parsetext(str)
            str = re.escape(' '.join([token[1] for token in pd]))
            sql = """INSERT INTO copyright (agent_fk, pfile_fk, copy_startbyte, copy_endbyte, content, hash, type)
                     VALUES (%d, %d, %d, %d, E'%s', E'%s', '%s')""" % (agent_pk, pfile, offsets[i][0], offsets[i][1],
                        str, hex(abs(hash(str))),
                        offsets[i][2])
            result = db.access(sql)
            if result != 0:
                print >> sys.stdout, "ERROR: (nonfatal) DB Access error, returned %d.\nERROR: DB STATUS: %s\nERROR: DB ERRMSG: %s\nERROR: sql=%s\nERROR: filename=%s" % (result, db.status(), db.errmsg(), sql, filename)

    return 0
Exemple #2
0
    try:
        model = pickle.load(open(options.model))
    except:
        print >> sys.stdout, 'You must specify a training file to create a model.\n\n'
        optparser.print_usage()
        sys.exit(1)

    if options.version:
        print "Source hash: %s" % hex(abs(hash(open(sys.argv[0]).read())))
        print 'Model hash: %s' % (model['id'])
    
    if options.analyze_from_file:
        files = [line.rstrip() for line in open(options.analyze_from_file).readlines()]
        for file in files:
            text = open(file).read(READMAX)
            results = library.label_file(text,model)
            print "%s :: " % (file)
            if len(results) == 0:
                print "No copyrights"
            for i in range(len(results)):
                print "\t[%d:%d:%s] %r" % (results[i][0], results[i][1], results[i][2], text[results[i][0]:results[i][1]])

    if options.analyze_from_command_line:
        files = args
        for file in files:
            text = open(file).read(READMAX)
            results = library.label_file(text,model)
            print "%s :: " % (file)
            if len(results) == 0:
                print "No copyrights"
            for i in range(len(results)):