def training_and_classification_with_kfold_cross_validation(collection_name, k):
    '''
    Training and classification of an autotagger using k-fold cross validation
    '''
    _split_metadata_and_features(collection_name, k)
    for i in range(1,k+1):
        # Create a gaia dataset with the training set
        print "----------------------- DATASET CREATION (FOLD %d)-----------------------" % i
        training_features='train/%s_features__fold%d.tsv' % (collection_name, i)
        chunk_size=5000
        dataset_suffix="fold%d" % i
        replace_dataset=True
        dataset_creator = DatasetCreator(collection_name)
        dataset_creator.create(training_features, chunk_size, dataset_suffix, replace_dataset)
            
        # Feature selection over the gaia dataset
        print "----------------------- FEATURE SELECTION (FOLD %d)-----------------------" % i
        dataset='dbs/%s__fold%d.db' % (collection_name, i)
        pca_covered_variance=75
        include_highlevel=True
        feature_selector = FeatureSelector()
        feature_selector.select(dataset, pca_covered_variance, include_highlevel)
        
        # Autotag a given test set
        print "----------------------- AUTOTAGGING (FOLD %d)-----------------------" % i
        dataset='transformed_dbs/%s__fold%d.db' % (collection_name, i)
        training_metadata='train/%s_metadata__fold%d.tsv' % (collection_name, i)
        test_features='test/%s_features__fold%d.tsv' % (collection_name, i)
        output_binary='test/%s_output_binary__fold%d.tsv' % (collection_name, i)
        output_affinity='test/%s_output_affinity__fold%d.tsv' % (collection_name, i)
        metric='LC'
        num_sim=18
        threshold=0.2
        autotagger = Autotagger()
        autotagger.train(dataset, training_metadata)
        autotagger.classify(test_features, output_binary, metric, num_sim, threshold, ranked=False)
        autotagger.classify(test_features, output_affinity, metric, num_sim, threshold, ranked=True)
    
    
    if args.dataset is None:
        args.dataset = "transformed_dbs/"+args.collection_name+".db"
    
    if not os.path.exists(args.dataset):
        print "Dataset '%s' not found" % args.dataset
        sys.exit(-1)
        
    if args.training_metadata is None:
        args.training_metadata = "train/"+args.collection_name+"_metadata.tsv"
    
    if not os.path.exists(args.training_metadata):
        print "Training metadata file '%s' not found" % args.training_metadata
        sys.exit(-1)
    
    if args.test_features is None:
        args.test_features = "test/"+args.collection_name+"_features.tsv"
    
    if not os.path.exists(args.test_features):
        print "Test features file '%s' not found" % args.test_features
        sys.exit(-1)
    
    if args.output is None:
        args.output = "test/"+args.collection_name+"_output.tsv"
    print args

    autotagger = Autotagger()
    autotagger.train(args.dataset, args.training_metadata)
    autotagger.classify(args.test_features, args.output, args.metric, args.num_sim, args.threshold, args.ranked_tags)