예제 #1
0
    argp.add_argument("-s" , "--sample_interval_mode" , choices=[sampleIntervalMode.LINE_MODE , sampleIntervalMode.CRLF_MODE] , 
                      default=sampleIntervalMode.LINE_MODE ,
                      help="The mode with describes what is the inverval symbol between samples , default is LINE_MODE")
    argp.add_argument("-i" , "--input" , type=str , default="stdin" , 
                      help="'sysin' for using standard input ; else file path is needed.")
    args = argp.parse_args()

    logging.info("loadding segmentor")
    segmentor = Segmentor()
    segmentor.load(CWS_MODEL_PATH)
    logging.info("done")

    # loading model
    if args.classname == classname.JUNK :
        model = TFIDFModel()
        model.load_model(JUNK_MODEL_PATH)
    else :
        model = BOOLModel()
        model.load_model(SENSITIVE_MODEL_PATH)
    
    #process the input file
    if args.input == "stdin" :
        ifo = sys.stdin
    else :
        ifo = open(args.input) # if error , just quit
    logging.info("reading samples from %s" %ifo.name)
    
    #print sample interval mode
    logging.info("set sample interval mode as '%s'" %args.sample_interval_mode)
    
    preprocessor = PreProcessor(segmentor , args.sample_interval_mode)