Ejemplo n.º 1
0
    # Argument parser
    parser = argparse.ArgumentParser(description="RCNLP - Authorship attribution with Echo State Network")

    # Argument
    parser.add_argument("--dataset", type=str, help="Dataset's directory.")
    parser.add_argument("--author1", type=str, help="First author.", default="1")
    parser.add_argument("--author2", type=str, help="Second author.", default="2")
    parser.add_argument("--training-size", type=int, help="Training size.", default=4)
    parser.add_argument("--test-size", type=int, help="Test size.", default=40)
    parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20)
    parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en')
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # >> 4. Generate W
    w = mdp.numx.random.choice([0.0, 1.0], (rc_size, rc_size), p=[1.0 - rc_w_sparsity, rc_w_sparsity])
    w[w == 1] = mdp.numx.random.rand(len(w[w == 1]))

    # Inputs
    reps = dict()
    reps['pos'] = [-1]
    reps['tag'] = [-1, 20]
    reps['fw'] = [-1, 60, 40, 20]
    reps['wv'] = [-1, 60, 40, 20]
    reps['letter'] = [-1]
Ejemplo n.º 2
0
    r_data = [t_in, zip(t_in, t_out)]

    # Train
    r_flow.train(r_data)

    return r_flow

# end create_reservoir

####################################################
# Main function
####################################################
if __name__ == "__main__":

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance, exp_value="LR=0.9vs0.05_slen=5000_mlen=1000_size=50")
    logging.save_globals()

    # Symbols
    switching_symbols = [1]
    other_symbols = [[0]]

    # Save locals
    logging.save_variables(locals())

    # Generate the data set
    generator = RCNLPSwitchingAttractorLanguage(tag_symbol=switching_symbols, other_symbols=other_symbols,
                                                memory_length=ds_memory_length, sparsity=ds_sparsity)
    inputs, outputs = generator.generate_data_set(sample_length=ds_sample_length, n_samples=ds_data_set_size)

    # Training and test
Ejemplo n.º 3
0
                        help="PCA model to load",
                        default=None)
    parser.add_argument(
        "--in-components",
        type=int,
        help="Number of principal component to reduce inputs to.",
        default=-1)
    parser.add_argument("--sentence",
                        action='store_true',
                        help="Test sentence classification rate?",
                        default=False)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components,
                                      pca_model=pca_model)
    elif args.converter == "tag":
Ejemplo n.º 4
0
                        default=None)
    parser.add_argument(
        "--in-components",
        type=int,
        help="Number of principal component to reduce inputs to.",
        default=-1)
    parser.add_argument("--samples", type=int, help="Samples", default=20)
    parser.add_argument("--step",
                        type=int,
                        help="Step for training size value",
                        default=5)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components,
                                      pca_model=pca_model)
    elif args.converter == "tag":
    # Argument
    parser.add_argument("--dataset", type=str, help="Dataset's directory.")
    parser.add_argument("--author1", type=int, help="Author 1' ID.")
    parser.add_argument("--author2", type=int, help="Author 2's ID.")
    parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20)
    parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en')
    parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos')
    parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None)
    parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.",
                        default=-1)
    parser.add_argument("--k", type=int, help="n-Fold Cross Validation.", default=10)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "tag":
        converter = RCNLPTagConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "fw":
    # Argument
    parser.add_argument("--dataset", type=str, help="Dataset's directory.")
    parser.add_argument("--author1", type=str, help="Author 1' ID.")
    parser.add_argument("--author2", type=str, help="Author 2's ID.")
    parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en')
    parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos')
    parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None)
    parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.",
                        default=-1)
    parser.add_argument("--K", type=int, help="n-Fold Cross Validation", default=10)
    parser.add_argument("--k", type=int, help="Fold position to use", default=0)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model is not None:
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # Base converter
    base_converter = ReverseConverter()

    # Reverse WV converter
    reverse_wv_converter = WVConverter(pca_model=pca_model, upper_level=base_converter)
Ejemplo n.º 7
0
                        type=int,
                        help="Number of reservoir to generate.",
                        default=20)
    parser.add_argument("--pca-model",
                        type=str,
                        help="PCA model to load",
                        default='')
    parser.add_argument("--output",
                        type=str,
                        help="Where to save the reservoir.",
                        default="reservoir.p")
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # Results
    state_results = np.array([])
    doc_results = np.array([])

    # >> 1. Convert the text to symbolic or continuous representations
Ejemplo n.º 8
0
    # Argument
    parser.add_argument("--dataset", type=str, help="Dataset's directory.")
    parser.add_argument("--author", type=int, help="Author's ID.")
    parser.add_argument("--training-size", type=int, help="How many texts from the author to use in the training")
    parser.add_argument("--negative-samples", type=int, help="How many texts from other authors to use as negative "
                                                             "examples")
    parser.add_argument("--test-size", type=int, help="How many texts from each authors to test the model?")
    parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en')
    parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos')
    parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None)
    parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.",
                        default=-1)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = PosConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "tag":
        converter = TagConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "fw":
Ejemplo n.º 9
0
                        help="Training size.",
                        default=4)
    parser.add_argument("--test-size", type=int, help="Test size.", default=40)
    parser.add_argument("--samples",
                        type=int,
                        help="Number of samples to use to assess accuracy.",
                        default=20)
    parser.add_argument("--lang",
                        type=str,
                        help="Language (ar, en, es, pt)",
                        default='en')
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # Generate W
    w = mdp.numx.random.choice([0.0, 1.0], (rc_size, rc_size),
                               p=[1.0 - rc_w_sparsity, rc_w_sparsity])
    w[w == 1] = mdp.numx.random.rand(len(w[w == 1]))

    # Init
    original_size_perf = np.array([])
    none_size_perf = np.array([])

    # Inputs
    reps = dict()
Ejemplo n.º 10
0
        type=int,
        help="Number of principal component to reduce inputs to.",
        default=-1)
    parser.add_argument("--model",
                        type=str,
                        help="ESN model to load",
                        default='')
    parser.add_argument("--output",
                        type=str,
                        help="Output filename where to save the model.",
                        default=None)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    if args.model == '':
        # PCA model
        pca_model = None
        if args.pca_model is not None:
            pca_model = pickle.load(open(args.pca_model, 'r'))
        # end if

        # >> 1. Choose a text to symbol converter.
        if args.converter == "pos":
            converter = RCNLPPosConverter(resize=args.in_components,
                                          pca_model=pca_model)
                        type=int,
                        help="n-Fold Cross Validation",
                        default=10)
    parser.add_argument("--samples",
                        type=int,
                        help="Number of reservoir to sample",
                        default=50)
    parser.add_argument("--verbose",
                        action='store_true',
                        help="Verbose mode",
                        default=False)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # Choose a text to symbol converter
    if args.converter == "pos":
        converter = PosConverter(lang=args.lang,
                                 resize=args.in_components,
                                 pca_model=pca_model)
                        type=int,
                        help="Step for reservoir size value",
                        default=50)
    parser.add_argument("--min",
                        type=int,
                        help="Minimum reservoir size value",
                        default=10)
    parser.add_argument("--max",
                        type=int,
                        help="Maximum reservoir size value",
                        default=1000)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components,
                                      pca_model=pca_model)
    elif args.converter == "tag":
Ejemplo n.º 13
0
ds_memory_length = 140  # How long time to remember the entry
ds_training_length = 30  # Training set length (number of samples)
ds_test_length = ds_data_set_size - ds_training_length
ds_sample_length = 3000  # Length of a sample
ds_slopping_memory = False  # Is the memory slowly fading away?
ds_sparsity = 0  # Number of samples with no switching

####################################################
# Main function
####################################################
if __name__ == "__main__":

    # Logging
    logging = RCNLPLogging(
        exp_name=ex_name,
        exp_inst=ex_instance,
        exp_value=
        "LR=0.5_size=100_slen=3000_mlen=140_IS=0.1_sparsity=0.05to1.0_2dim")
    logging.save_globals()

    # Symbols
    switching_symbol = [1, 0]
    #switch_back_symbol = [-1]
    other_symbols = [[0, 0], [0, 1]]

    # Save locals
    logging.save_variables(locals())

    # Parameter average results
    parameter_remembering_rates = []
    parameter_lucidity = []
                        type=int,
                        help="Number of states to show",
                        default=500)
    parser.add_argument("--samples",
                        type=int,
                        help="Samples to estimate performances",
                        default=20)
    parser.add_argument("--pca-model",
                        type=str,
                        help="PCA model to load",
                        default='')
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name,
                           exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(
                               locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # Results to analyze
    explore_results = np.array([])
    explore_deviation = np.array([])
    explore_t_test = np.array([])
Ejemplo n.º 15
0
    parser.add_argument("--dataset", type=str, help="Dataset's directory.")
    parser.add_argument("--training-size", type=int, help="Number of texts from the author", default=1)
    parser.add_argument("--test-size", type=int, help="Number of texts to assess the model.", default=20)
    parser.add_argument("--negatives", type=int, help="Number of negative texts to use", default=1)
    parser.add_argument("--samples", type=int, help="Number of samples to use to assess accuracy.", default=20)
    parser.add_argument("--lang", type=str, help="Language (ar, en, es, pt)", default='en')
    parser.add_argument("--converter", type=str, help="The text converter to use (fw, pos, tag, wv).", default='pos')
    parser.add_argument("--pca-model", type=str, help="PCA model to load", default=None)
    parser.add_argument("--in-components", type=int, help="Number of principal component to reduce inputs to.",
                        default=-1)
    parser.add_argument("--threshold", type=float, help="Confidence threshold", default=0.5)
    parser.add_argument("--sentence", action='store_true', help="Test sentence classification rate?", default=False)
    args = parser.parse_args()

    # Logging
    logging = RCNLPLogging(exp_name=ex_name, exp_inst=ex_instance,
                           exp_value=RCNLPLogging.generate_experience_name(locals()))
    logging.save_globals()
    logging.save_variables(locals())

    # PCA model
    pca_model = None
    if args.pca_model != "":
        pca_model = pickle.load(open(args.pca_model, 'r'))
    # end if

    # >> 1. Choose a text to symbol converter.
    if args.converter == "pos":
        converter = RCNLPPosConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "tag":
        converter = RCNLPTagConverter(resize=args.in_components, pca_model=pca_model)
    elif args.converter == "fw":