コード例 #1
0
def main():
    description = "Cleans up old backups to leave more room on the backup server." \
                  "\n\nE.g. python cleaner.py -p /path/to/archive -o 3:4 7:7." \
                  "\n\nThe example provided will keep an archive from every 4th day if it's more than 3 days old" \
                  " and archive every 7 days if it's more than a week old." \
                  "\n\nThe format of backups this script takes is BACKUP_SET-VERSION."
    parser = argparse.ArgumentParser(
        description=description, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument('-p',
                        '--root-path',
                        type=str,
                        required=True,
                        help='The root path of your backups.')
    parser.add_argument(
        '-o',
        '--options',
        type=str,
        required=True,
        nargs='*',
        help='Your age threshold and desired interval size separated by a colon'
    )
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        help='Automatically confirms that you want to delete.')
    args = parser.parse_args()

    calc = Calculator(args.root_path, args.options, args.force)
    calc.calculate()

    cleaner = Cleaner(calc)
    cleaner.clean()
コード例 #2
0
ファイル: Indexer.py プロジェクト: atbradley/eaccpf-indexer
 def run(self):
     """
     Start processing.
     """
     # parse the command line arguments and set logging options
     try:
         self.args = self.parser.parse_args()
         self.configureLogging()
         self.logger.info("Started with {0}".format(' '.join(sys.argv[1:])))
     except Exception as e:
         self.parser.print_help()
         sys.exit(e)
     # load the configuration file
     try:
         with open(self.args.config) as f:
             self.config.readfp(f)
     except Exception as e:
         self.logger.critical("Could not load the specified configuration file")
         sys.exit(e)
     # set options
     Cfg.LOG_EXC_INFO = self.args.trace
     # execute commands
     with Timer.Timer() as t:
         if self.args.crawl:
             import Crawler
             Crawler.crawl(self.config, self.args.update)
         if self.args.clean:
             import Cleaner
             Cleaner.clean(self.config, self.args.update)
         if self.args.infer:
             import Facter
             Facter.infer(self.config, self.args.update)
         if self.args.graph:
             import Grapher
             Grapher.graph(self.config, self.args.update)
         if self.args.transform:
             import Transformer
             Transformer.transform(self.config)
         if self.args.post:
             import Poster
             Poster.post(self.config)
         if self.args.analyze:
             import Analyzer
             Analyzer.analyze(self.config, self.args.update)
     self.logger.info("Indexer finished in {0}:{1}:{2}".format(t.hours, t.minutes, t.seconds))
コード例 #3
0
ファイル: Main.py プロジェクト: ICT-MASTER/IKT441_LAB2_NN
def create_training_data():
    print("Loading articles... This may take a while")
    t_start = time.time()
    articles = []
    for root, dirnames, filenames in os.walk('./Articles'):
        for filename in fnmatch.filter(filenames, '*.txt'):
            articles.append(os.path.join(root, filename))
    print("Loading articles complete. Took {0} seconds...".format(time.time() - t_start))



    # Questions

    # Q1
    in_random_articles = input("Use random articles? [y/N]")
    if in_random_articles == "y":
        random.shuffle(articles)
        in_random_articles = True

    # Q2
    in_clean_file = input("Clean articles [Y/n]")
    if in_clean_file == "n":
        in_clean_file = False
    else:
        in_clean_file = True

    # Q3
    in_num_articles = input("Number or articles? [Default: 10]")
    try:
        num_articles = int(in_num_articles)
    except:
        num_articles = 10

    selected_articles = articles[0:min(len(articles), num_articles)-1]

    try:
        os.mkdir("./Training")
    except:
        pass

    training_filename = "Training-{0}-{1}-{2}-{3}.txt".format( \
        "Clean" if in_clean_file == True else "Dirty", \
        "Shuffle" if in_random_articles else "Iterate", \
        num_articles, \
        str(uuid.uuid4())[:8])
    for article in selected_articles:
        with codecs.open("./Training/" + training_filename, "a+", encoding="utf8") as file:
            with codecs.open(article,'r', encoding="utf8") as f:
                content = f.read()
                if in_clean_file == True:
                    content = Cleaner.clean(content)

                file.write(content)
    print("Created Training set named: {0}".format(training_filename))
コード例 #4
0
# importo le librerie
import csv
import Cleaner
import sys

csv.field_size_limit(sys.maxsize)  # risolve il problema di overflow

with open('File_Parsered.csv', 'rt', encoding='utf8') as f, \
        open('/Users/robertopenna/Desktop/Archivio/UNIMIB/Stage/JST-master/data/MR.dat', 'wt', encoding='utf8') as d:
    csv_f = csv.reader(f)
    next(csv_f)

    for row in csv_f:

        idtweet = row[0]
        string = row[1].lower()
        string_clean = Cleaner.clean(string)
        string_noTW = Cleaner.remove_stopW(string_clean)
        string_fin = string_noTW.replace('é', 'e').replace('ò', 'o').replace('è', 'e').replace('à', 'a').replace('ù', 'u')

        if string_fin != "":
            d.write('Tweet' + idtweet + ' ' + string_fin + '\n')
コード例 #5
0
ファイル: analysis.py プロジェクト: SimonHFL/StarWarsSurvey
import pandas as pd
import Cleaner
from sklearn import cross_validation
from sklearn.ensemble import RandomForestClassifier
import FeatureSelector

# read csv

star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1")

# clean data

star_wars = Cleaner.clean(star_wars)

# split into train and test data
star_wars_train = star_wars[:-200]
star_wars_test = star_wars[-200:]


# Initialize our algorithm with the default paramters
# n_estimators is the number of trees we want to make
# min_samples_split is the minimum number of rows we need to make a split
# min_samples_leaf is the minimum number of samples we can have at the place where a tree branch ends (the bottom points of the tree)
alg = RandomForestClassifier(random_state=1, n_estimators=10, min_samples_split=2, min_samples_leaf=1)

# Set predictors
predictors = ["SeenSW", "IsStarTrekFan", "Gender", "Age", "Income", "Education", "Location"]

# uncomment to check what features to use
# FeatureSelector.check(star_wars, predictors)