예제 #1
0
def text2score(ans_path, pred_path):
    ans_data = Data(ans_path)
    pred_data = Data(pred_path)
    print(
        metrics.flat_classification_report(ans_data.Y,
                                           pred_data.Y,
                                           labels=('I', 'E'),
                                           digits=4))
    def __init__(self, verbose = True):
        self.verbose = verbose

        self.log("Loading data...")
        self.words = Data.load_words("most_informative")
        reviews = Data.load_reviews()

        self.log("Feature extraction...")
        data_set = Featuresets.get(reviews, self.words)
        self.train_set = data_set[:3*len(data_set)/4]
        self.test_set  = data_set[3*len(data_set)/4:]
예제 #3
0
def load_user_document(data: Data, user_slug: str, doc_slug: str):
    """
    Loads a directory of text files into a stored document.

    Don't generate the new document, just zap any cached HTML.
    """
    src_dir = fixtures_dir(doc_slug)
    src_dict = load_dir(src_dir)
    data.userDocument_set(user_slug, doc_slug, src_dict, {})
    data.userDocumentCache_delete(user_slug, doc_slug)
    print("Loaded: {} ({:d})".format(doc_slug, len(src_dict)))
예제 #4
0
    def __init__(self, verbose=True):
        self.verbose = verbose

        self.log("Loading data...")
        self.words = Data.load_words("most_informative")
        reviews = Data.load_reviews()

        self.log("Feature extraction...")
        data_set = Featuresets.get(reviews, self.words)
        self.train_set = data_set[:3 * len(data_set) / 4]
        self.test_set = data_set[3 * len(data_set) / 4:]
예제 #5
0
파일: base.py 프로젝트: eukras/article-wiki
    def html(self, renderer):

        config = load_env_config()
        data = Data(config)

        articles = []
        for line in self.text.splitlines():
            parts = line.split('/')
            if len(parts) == 2:
                user_slug, doc_slug = parts
                metadata = data.userDocumentMetadata_get(user_slug, doc_slug)
                if metadata:
                    articles += [metadata]

        if len(articles) == 0:
            return ""

        env = Environment(autoescape=False)
        tpl = env.from_string(
            trim("""
            <nav class="article-cards">
            {% for a in articles %}
                <div class="article-card">

                    <a
                        href="/read/{{ a['user'] }}/{{ a['slug'] }}"
                    >
                        <div class="article-card-title balance-text">
                            {{ a['title'] }}
                        </div>
                        <div class="article-card-summary balance-text">
                            {{ a['summary'] }}
                        </div>
                    </a>

                    {% set words = a.word_count | int %}
                    <div class="article-card-details">
                        {{ a['date'] }} &middot; {{ "{:,d}".format(words) }} words
                    </div>
                    <div class="article-card-download">
                        <a 
                            href="/epub/{{ a['user'] }}/{{ a['slug'] }}"
                        >
                            <i class="fa fa-arrow-circle-down" ></i> eBook
                        </a>
                    </div>

                </div>
            {% endfor %}
            </nav>
        """))

        return tpl.render(articles=articles)
예제 #6
0
    def run_siamese(self, train_csv, config):
        data = Data()
        display = Display()
        data.run(train_csv,
                 n_train_samples=config.n_train_samples, 
                 n_validation_samples=config.n_validation_samples, 
                 embedding_matrix = config.embedding_matrix,
                 max_len = config.max_len,
                 embedding_dim=config.embedding_dim, 
                 train_x1 = config.train_x1,
                 train_x2 = config.train_x2,
                 contrastive=config.contrastive, 
                 save_embedding=config.save_embedding, 
                 save_train_data=config.save_train_data)
        with tf.Graph().as_default() as graph:
           config.model = config.model(data)
           writer = TensorBoard(graph=graph, logdir=config.logdir).writer
           output, loss, acc, train_summ, valid_summ, opt, merged = config.build_network(graph)
           init = tf.global_variables_initializer()
           with tf.Session(graph=graph) as sess:
               sess.run(init)
               for epoch in range(config.n_epochs):
                 train_iter_ = data.batch_generator(config.batch_size)
                 for batch_idx, batch in enumerate(tqdm(train_iter_)):
                    train_x1_batch, train_x2_batch, train_labels_batch = batch
                    _, batch_train_loss, batch_train_accuracy, batch_train_summary, _, summary = sess.run([output, loss, acc, train_summ, opt, merged], 
                                                                                    feed_dict={
                                                                                                config.model.network.x1 : train_x1_batch,
                                                                                                config.model.network.x2 : train_x2_batch,
                                                                                                config.model.loss.labels : train_labels_batch,
                                                                                                config.model.network.embedding_matrix : data.embedding_matrix
                                                                                              })
                    display.log_train(epoch, batch_idx, batch_train_loss, batch_train_accuracy)
                    writer.add_summary(batch_train_summary, batch_idx)

                    if config.calculate_validation:
                        if batch_idx % 100 == 0:
                            batch_valid_accuracy, batch_valid_summary = sess.run([acc, valid_summ], feed_dict={
                                                                                config.model.network.x1 : data.valid_x1,
                                                                                config.model.network.x2 : data.valid_x2,
                                                                                config.model.loss.labels : data.valid_labels,
                                                                                config.model.network.embedding_matrix : data.embedding_matrix
                                                                                })
                            display.log_validation(epoch, batch_idx, batch_valid_accuracy)
                            writer.add_summary(batch_valid_summary, batch_idx)
                    writer.add_summary(summary, batch_idx)
                    
        display.done()
예제 #7
0
    def __init__(self, llama):
        super(CommandProcessor, self).__init__()
        self.llama = llama
        self.threads = llama.threads
        self.data = Data(self.threads)

        self.built_in_methods = {
            'READ OUT': self.read_out,
            'WRITE IN': self.write_in
        }

        self.command_handlers = {
            'REM': self.rem,
            'NOP': self.nop,
            'COME FROM': self.come_from,
            '<=': self.assign_left,
            '=>': self.assign_right,
            '<-': self.subtract_left,
            '->': self.subtract_right,
            '<+': self.add_left,
            '+>': self.add_right
        }
예제 #8
0
    def read(self, *args, **kwargs):
        if globals.debug > 1: print("gui.read")
        self.data = Data(
            read_file(self.plotcontrols.current_file.get()),
            #rotations=(self.controls.rotation_x.get(),self.controls.rotation_y.get(),self.controls.rotation_z.get()),
        )

        if self.data.is_image:
            return

        # Make sure the data has the required keys for scatter plots
        data_keys = self.data['data'].keys()
        for data_key in ['x', 'y', 'z']:
            if data_key not in data_keys:
                raise ValueError("Could not find required key '" + data_key +
                                 "' in the data from read_file")

        keys = []
        N = len(self.data['data']['x'])
        for key, val in self.data['data'].items():
            if hasattr(val, "__len__"):
                if len(val) == N: keys.append(key)

        # Check for requisite keys for colorbar plots
        values = ['None']
        ckeys = self.data['data'].keys()
        for key in ['x', 'y', 'm', 'h', 'rho']:
            if key not in ckeys: break
        else: values.append("Column density")
        for key in ['x', 'y', 'm', 'h', 'rho', 'opacity']:
            if key not in ckeys: break
        else: values.append("Optical depth")

        # Update the axis controllers
        for axis_name, axis_controller in self.controls.axis_controllers.items(
        ):
            if axis_name != 'Colorbar':
                axis_controller.combobox.configure(values=keys)
        self.controls.axis_controllers['Colorbar'].combobox.configure(
            values=values)

        self.initialize_xy_controls()
예제 #9
0
    def __init__(self, llama):
        super(CommandProcessor, self).__init__()
        self.llama = llama
        self.threads = llama.threads
        self.data = Data(self.threads)

        self.built_in_methods = {
            'READ OUT':self.read_out,
            'WRITE IN':self.write_in
        }

        self.command_handlers = {
            'REM':self.rem,
            'NOP':self.nop,
            'COME FROM':self.come_from,
            '<=': self.assign_left,
            '=>': self.assign_right,
            '<-': self.subtract_left,
            '->': self.subtract_right,
            '<+': self.add_left,
            '+>': self.add_right
        }
def test_bulk_get_compression(cluster, conf, num_docs, accept_encoding, x_accept_part_encoding, user_agent):

    log.info("Using conf: {}".format(conf))
    log.info("Using num_docs: {}".format(num_docs))
    log.info("Using user_agent: {}".format(user_agent))
    log.info("Using accept_encoding: {}".format(accept_encoding))
    log.info("Using x_accept_part_encoding: {}".format(x_accept_part_encoding))

    mode = cluster.reset(config_path=conf)
    admin = Admin(cluster.sync_gateways[0])

    user = admin.register_user(cluster.sync_gateways[0], "db", "seth", "password", channels=["seth"])

    doc_body = Data.load("mock_users_20k.json")

    with concurrent.futures.ThreadPoolExecutor(max_workers=lib.settings.MAX_REQUEST_WORKERS) as executor:
        futures = [executor.submit(user.add_doc, doc_id="test-{}".format(i), content=doc_body) for i in range(num_docs)]
        for future in concurrent.futures.as_completed(futures):
            try:
                log.info(future.result())
            except Exception as e:
                log.error("Failed to push doc: {}".format(e))

    docs = [{"id": "test-{}".format(i)} for i in range(num_docs)]
    payload = {"docs": docs}

    # Issue curl request and get size of request
    response_size = issue_request(cluster.sync_gateways[0], user_agent, accept_encoding, x_accept_part_encoding, payload)
    log.info("Response size: {}".format(response_size))

    # Verfiy size matches expected size
    verify_response_size(user_agent, accept_encoding, x_accept_part_encoding, response_size)

    # Verify all sync_gateways are running
    errors = cluster.verify_alive(mode)
    assert(len(errors) == 0)
예제 #11
0
class Replacer():
    def __init__(self, wb_name, ws_name):
        self.iv = "O"
        self.dv = "X"
        self.data = Data(wb_name, ws_name)
        self.new_dv_list = []
        self.empty_char = 's'

    def strip_all_elements(self, untrimmed_list):
        """strip all elements of a list"""
        for x in range(len(untrimmed_list)):
            untrimmed_list[x] = untrimmed_list[x].strip()
        return untrimmed_list

    def replace_iv_dv(self):
        """if iv in dv, remove it from dv"""
        # get iv and dv columns
        iv_list = self.data.return_column_as_list(self.iv)
        dv_list = self.data.return_column_as_list(self.dv)

        # sanity check that both lists are the same
        if len(iv_list) == len(dv_list):
            # loop through iv's
            for x in range(len(iv_list)):
                # define what cells we are working with
                iv_cell = str(iv_list[x])
                dv_cell = str(dv_list[x])

                # if the cell is empty, don't even bother comparing
                if iv_cell.strip() != "":
                    iv_cell_authors_list = self.strip_all_elements(
                        iv_cell.split(","))
                    dv_cell_authors_list = self.strip_all_elements(
                        dv_cell.split(","))

                    # loop through the authors
                    for untrimmed_iv_author in iv_cell_authors_list:
                        trimmed_iv_author = untrimmed_iv_author.split(
                            "(")[0].strip()
                        # loop through the dv comparing the trimmed author to the dv's authors
                        for untrimmed_dv_author in range(
                                len(dv_cell_authors_list)):
                            if untrimmed_dv_author >= len(
                                    dv_cell_authors_list):
                                break
                            untrimmed_dv_author = dv_cell_authors_list[
                                untrimmed_dv_author]
                            trimmed_dv_author = untrimmed_dv_author.split(
                                "(")[0].strip()
                            if trimmed_dv_author.lower(
                            ) == trimmed_iv_author.lower():
                                while True:
                                    try:
                                        dv_cell_authors_list.remove(
                                            untrimmed_dv_author)
                                    except ValueError:
                                        break
                    if len(dv_cell_authors_list) != 0 and len(
                            dv_cell_authors_list) != 1:
                        new_dv_cell_str = ", ".join(dv_cell_authors_list)
                        if new_dv_cell_str == dv_cell:
                            self.new_dv_list.append("none")
                        else:
                            self.new_dv_list.append(new_dv_cell_str.strip())
                    elif len(dv_cell_authors_list) == 0:
                        self.new_dv_list.append(self.empty_char)
                    elif len(dv_cell_authors_list) == 1:
                        if dv_cell_authors_list[0].strip() == '':
                            self.new_dv_list.append(self.empty_char)
                        else:
                            self.new_dv_list.append(
                                dv_cell_authors_list[0].strip())
                else:
                    self.new_dv_list.append("none")
            # update the cells with their new values
            self.data.update_all_cells_in_column(self.dv, self.new_dv_list)

            # log the changes to the file system
            log = Log_Maker(self.dv, dv_list, self.new_dv_list)
            log.both()

            return self.new_dv_list
        else:
            raise Exception("column " + self.dv +
                            " was not equal in length to column " + self.iv)
예제 #12
0
from lib.wiki.utils import pluralize, trim

HTTP_BAD_REQUEST = 400
HTTP_UNAUTHORIZED = 401
HTTP_NOT_FOUND = 404

HASH = 97586

config = load_env_config()

if "pytest" in sys.modules:
    logging.info("Running in PyTest: Reconfiguring to use test database.")
    config['REDIS_DATABASE'] = config['REDIS_TEST_DATABASE']

# Redis, Jinja
data = Data(config)
views = JinjaTemplates(loader=PackageLoader('app', 'views'),
                       trim_blocks=True,
                       lstrip_blocks=True,
                       keep_trailing_newline=True)

# Sessions
bottleApp = bottle.app()
session_opts = {
    'session.cookie_expires': True,
    'session.encrypt_key': config['COOKIE_SECRET'],
    'session.httponly': True,
    'session.timeout': 3600 * 24,  # 1 day
    'session.type': 'cookie',
    'session.validate_key': True,
}
예제 #13
0
파일: main.py 프로젝트: franziz/igfollowers
from lib.instagram.instagram import Instagram
from lib.engine              import Engine
from lib.data                import Data
from tqdm                    import tqdm
import time
import random

if __name__ == "__main__":
    instagram          = Instagram()
    instagram.username = "******"
    instagram.password = "******"
    instagram.login()

    data = Data()
    data = tqdm(data.user_list)
    data.set_description("Crawling users...")
    for datum in data:
      user      = instagram.goto_user(datum["userName"])
      followers = user.followers
      followers = tqdm(followers)

      engine = Engine(datum)
      for follower in followers:
          followers.set_description("[igfollowers][{}] Saving user data...".format(datum["userName"]))
          engine.save(follower)
      random_number = random.randint(10000,50000)/1000
      print("[igfollowers] Sleeping for {}s".format(random_number))
      time.sleep(random_number)

    instagram.quit()
예제 #14
0
def write_epub(user_slug, doc_slug, file_path):

    # Get all the data
    config = load_env_config()
    data = Data(config)

    user = data.user_get(user_slug)  # or None
    if not user:
        raise RuntimeError("User not found: %s", user_slug)

    document = data.userDocument_get(user_slug, doc_slug)  # or Noen
    if not document:
        raise RuntimeError("Document not found: %s" % doc_slug)

    # -------------------------
    # 0. Create book
    # 1. Create cover
    # 2. Create title page
    # 3. Create chapter (which basically is the book)
    #    ... This upgrades to multiple chapters when compiling books.

    # Pre-processing...

    settings = Settings({
        'config:user': user_slug,
        'config:document': doc_slug,
    })
    wiki = Wiki(settings)
    xhtml = wiki.process(user_slug, doc_slug, document)
    metadata = wiki.compile_metadata(config['TIME_ZONE'], user_slug, doc_slug)
    metadata['url'] = '/read/{:s}/{:s}'.format(user_slug, doc_slug),

    title = metadata.get('title', 'Untitled')
    summary = metadata.get('summary', '')
    author = metadata.get('author', 'Anonymous')
    date = metadata.get('date', '')

    # -------------------------
    # 0. CREATE BOOK

    book = epub.EpubBook()

    # set metadata
    book.set_identifier(user_slug + '+' + doc_slug)
    book.set_title(title)
    book.set_language('en')
    book.add_author(author)

    # define CSS style
    with open('static/epub.css') as f:
        style = f.read()
    global_css = epub.EpubItem(uid="style_nav",
                               file_name="style/nav.css",
                               media_type="text/css",
                               content=style)
    book.add_item(global_css)

    # -------------------------
    # 1. Create Cover

    tmp_cover_file = "/tmp/%s-%s-cover.png" % (user_slug, doc_slug)
    image = make_background((1600, 2200), (160, 184, 160))
    cover = make_cover(image, [title, summary, author, date],
                       [COLOR_TEXT, COLOR_SHADOW])
    cover.save(tmp_cover_file, "JPEG")
    chapter_file_name = doc_slug + '.xhtml'

    assert os.path.exists(tmp_cover_file)
    cover_image = open(tmp_cover_file, 'rb').read()
    book.set_cover("image.jpg", cover_image)

    # -------------------------
    # 2. Create Title Page

    date_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    title_xhtml = """
    <html>
    <body>
        <div>Generated by <i>Article Wiki</i>:</div>
        <div>%s</div>
        <div>&nbsp;</div>
        <div>Permanent URL:</div>
        <div>http://chapman.wiki/read/%s/%s</div>
    </body>
    </html>
    """ % (date_string, user_slug, doc_slug)

    c1 = epub.EpubHtml(title="About this book",
                       file_name="title.xhtml",
                       lang='en')
    c1.content = title_xhtml
    c1.add_item(global_css)
    book.add_item(c1)

    # -------------------------
    # 3. Create Chapter

    c2 = epub.EpubHtml(title=title, file_name=chapter_file_name, lang='en')
    c2.content = xhtml
    c2.add_item(global_css)
    book.add_item(c2)

    # Define Table Of Contents
    book.toc = (
        epub.Link(chapter_file_name, title, doc_slug),
        # (epub.Section(user_slug), (c2))
    )

    # add default NCX and Nav file
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    # basic spine
    book.spine = ['nav', c1, c2]

    # write to the file
    epub.write_epub(file_path, book, {})
예제 #15
0
 def predict_file(self, path):
     test_data = Data(path)
     test_data.feature_loader(self.funcs, self.params)
     self.Y_pred = self.clf.predict(test_data.X)
     self.Y_private = test_data.Y
     return self.Y_pred
예제 #16
0
from lib.msplot import *
import easygui
from lib.data import Data, AuxPlots
from lib.plotter import Plotter
import numpy as np

if __name__ == '__main__':
    path = easygui.diropenbox()
    d = Data()

    d.load(path)

    p = Plotter(d)
    print(d.aux_data)

    # if __name__ == '__main__':
    # 	path = easygui.diropenbox()

    # 	s = Spectrum()

    # 	s.load(path, onblast_cutoff=150)
    # 	#s.load("C:\\Users\\ishma\\Dropbox (SteinLab)\\spectra\\(ID     186nm) 2016-07-26-tip15")
    # 	#s.load("../(ID     186nm) 2016-07-26-tip15/")

    p.makeAnimation(window=500,
                    step=20,
                    scan_range=(None, None),
                    mass_range=(None, None),
                    out_name="test.mp4",
                    normalization=SpecNorm.SCAN,
                    label_peaks=True,
        IDENTIFIER = args.identifier + utils.datestr()

    POSITIVE_IMAGE_DIR = os.path.join('data/Flickr_2800', args.pos_class)

    # let the user know what we're up to
    log.log("Training " + IDENTIFIER + " FROM " + POSITIVE_IMAGE_DIR)
    if args.transplant is not None:
        log.log("(Starting from " + args.transplant + "_t" + args.lesion_indicator + ")")
    else:
        log.log("(Starting from <blank net, random weights>)")

    # train it all
    with AlexNet() as alexnet:
        # Load data
        log.log("[Loading Data...]")
        all_data = Data()
        all_data.load_images(POSITIVE_IMAGE_DIR, POSITIVE_LABEL)
        all_data.load_images('data/Flickr_2800/notall', NEGATIVE_LABEL)
        train_data, test_data = all_data.split_train_test(train_split=0.90)

        # Initialize net
        if args.transplant is not None:
            load_transplant_dir = os.path.join(TRANSPLANT_DIR, args.transplant + '.ckpt')
            alexnet.load_transplant(load_transplant_dir)

        if args.lesion_indicator is not '':
            layers = [i for i, e in enumerate(args.lesion_indicator) if e == '0']
            alexnet.lesion_layers(layers)

        # Train
        log.log("[Training...]")
예제 #18
0
class CommandProcessor(object):

    """docstring for CommandProcessor"""

    def __init__(self, llama):
        super(CommandProcessor, self).__init__()
        self.llama = llama
        self.threads = llama.threads
        self.data = Data(self.threads)

        self.built_in_methods = {
            'READ OUT':self.read_out,
            'WRITE IN':self.write_in
        }

        self.command_handlers = {
            'REM':self.rem,
            'NOP':self.nop,
            'COME FROM':self.come_from,
            '<=': self.assign_left,
            '=>': self.assign_right,
            '<-': self.subtract_left,
            '->': self.subtract_right,
            '<+': self.add_left,
            '+>': self.add_right
        }

    def come_from(self, x, y, thread=0):
        pass

    def check_come_from_value(self, ip, value, thread=0):

        # COME FROM statements that aren't important enough will be
        # ignored.
        if not self.llama.is_important_enough(self.threads.code(thread)[ip].importance):
            return False

        # Get the IP of the statement that is ip_dir from the COME FROM
        # and get whatever value the command has.
        new_ip = self.llama.next_ip(ip)
        compare_value = self.llama.threads.code(thread)[new_ip].command
        compare_value = self.threads.symbol_table(thread).get_from_symbol_table(compare_value)
        return value == compare_value

    def check_come_from(self, value, thread=0):
        # TODO Index COME Froms? - may be fun with a constantly changine
        # code base
#        value = self.symbol_table.get_from_sybmol_table(value)
        for x in range(0, len(self.threads.code(thread))):
            if self.threads.code(thread)[x].command == 'COME FROM':
                if self.check_come_from_value(x, value):
                    # TODO: Multithread here - array of ip or program state?
                    new_thread_id = self.threads.copy(thread)
                    new_ip = self.llama.next_ip(x)
                    new_ip = self.llama.next_ip(new_ip)
                    self.threads.set_ip(new_ip, thread=new_thread_id)
                    # self.threads.set_ip(new_ip, thread=thread)
                    #self.last_value = None # To prevent redoing this
                    self.threads.set_last_value(None, thread=thread) # To prevent redoing this
                    self.threads.set_last_value(None, thread=new_thread_id) # To prevent redoing this
                    self.debug(3, "Executing Come From: Sending IP to %s" % new_ip, thread=thread,
                            ip=self.threads.ip(thread))
                    self.debug(3, "The IP will increment from that point", thread=thread,
                            ip=self.threads.ip(thread))
                    return True
                else:
                    pass
        return False

    def read_out(self, x, thread=0):
        data = None
        target = self.threads.symbol_table(thread).get_from_symbol_table(x)
        if String.is_string(target):
            string_value = String(target)
            data = string_value.decode(target[1:])
        elif Numeric.is_numeric(target):
            numeric_value = Numeric(target)
            data = numeric_value.decode(target[1:])
        elif Boolean.is_boolean(target):
            boolean_value = Boolean(target, classic=self.llama.tight)
            data = boolean_value.decode(target)
        elif Vector.is_vector(target):
            vector_value = Vector(target)
            data = vector_value.decode()
        elif Indicator.is_indicator(target):
            # Indicators cannot be read out
            self.debug(1, "Attempt to read out an indicator: %s" % target, msg_type="WRN", thread=thread,
                    ip=self.threads.ip(thread))

        if data:
            self.debug(4, "Called readout with %s displayed as %s" % \
                    (target, data), thread=thread,
                    ip=self.threads.ip(thread))
            print data

    def write_in(self, x, thread=0):
        pass

    def nop(self, x, y, thread=0):
        pass

    def rem(self, x, y, thread=0):
        pass

    def assign_left(self, x, y, thread=0):
        for x_value in x:
            for y_value in y:
                if x_value.command in self.built_in_methods:
                    self.debug(4, "Built in method %s is target of assignment" %  \
                            x_value.command, thread=thread,
                            ip=self.threads.ip(thread))
                    self.built_in_methods[x_value.command](y_value.command, thread=thread)
                else:
                    new_symbol = self.threads.symbol_table(thread).get_new_symbol(y_value.command)
                    symbol = self.threads.symbol_table(thread)
                    self.threads.symbol_table(thread).symbol_table[x_value.command] = y_value.command
                    self.llama.threads.set_last_value(new_symbol, thread=thread)
                    self.debug(3, "Assigned:  \x1b[1;33m%s = %s" % (x_value.command, y_value.command), thread=thread,
                            ip=self.threads.ip(thread))
                    if self.check_come_from(new_symbol):
                        self.debug(3, "COME FROM found after left assignment", thread=thread,
                                ip=self.threads.ip(thread))
                        return

    def assign_right(self, x, y, thread=0):
        return self.assign_left(y, x, thread=thread)

    def subtract_left(self, x, y, thread=0):
        pass

    def subtract_right(self, x, y, thread=0):
        return self.subtract_left(y, x)

    def add_left(self, x, y, thread=0):
        for x_value in x:
            for y_value in y:
                self.data.add(y_value.command, x_value.command)

    def add_right(self, x, y, thread=0):
        return self.add_left(y, x)

    def extract_command_elements(self, command_stack, thread=0):
        state = 0
        arg_0 = []
        commands = []
        arg_1 = []

        for x in command_stack:

            is_command = CommandConstants.is_command(x)
#            is_symbol = self.symbol_table.is_symbol(x)

            if is_command and state == 0:
                state = 1
            elif not is_command and state == 1:
                state = 2

            if state == 0:
                arg_0.append(x)
            elif state == 1:
                commands.append(x)
            elif state == 2:
                arg_1.append(x)

        return (arg_0, commands, arg_1)

    def is_constant(self, value, thread=0):
        if self.threads.symbol_table(thread).is_symbol(value):
            return False

        return String.is_string(value) or Numeric.is_numeric(value) or \
                Boolean.is_boolean(value) or Vector.is_vector(value) or \
                Indicator.is_indicator(value)

    def flatten(self, args):
        new_symbol = [x for x in args if not self.is_constant(x.command)]
        # TODO - betterized flattening - taking into account types?
        new_const = ''.join([x.command for x in args if self.is_constant(x.command)])
        if new_const:
            c = Command()
            c.command = new_const
            new_symbol.append(c)
        return new_symbol

    def process(self, command_stack, thread=0):

        arg_0, commands, arg_1 = \
                self.extract_command_elements(command_stack, thread=thread)

        self.debug(11, "Executing command: %s" % command_stack, thread=thread,
                ip=self.threads.ip(thread))
        self.debug(11, "Raw Arg0:          %s" % arg_0, thread=thread,
                ip=self.threads.ip(thread))
        self.debug(11, "Commands:          %s" % commands, thread=thread,
                ip=self.threads.ip(thread))
        self.debug(11, "Raw Arg1:          %s" % arg_1, thread=thread,
                ip=self.threads.ip(thread))

        # TODO - needs code to flatten constants (auto append)
        arg_0 = self.flatten(arg_0)
        arg_1 = self.flatten(arg_1)

        self.debug(11, "Flattened Arg0:    %s" % arg_0, thread=thread,
                ip=self.threads.ip(thread))
        self.debug(11, "Flattened Arg1:    %s" % arg_1, thread=thread,
                ip=self.threads.ip(thread))

        for c in commands:
            self.debug(11, "Executing %s against %s and %s" % \
                    (c, arg_0, arg_1), thread=thread,
                    ip=self.threads.ip(thread))
            self.command_handlers[c.command](arg_0, arg_1, thread=thread)
예제 #19
0
def update_all_cells_in_column_throws_param2(done):
    # create instance of data class
    data = Data(test_wb_name, ws_name)

    # list of all invalid inputs
    long_list = [
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long", "im long", "im long",
        "im long", "im long", "im long", "im long"
    ]
    invalid_inputs = [
        long_list,
        [],
        "a",
        "b",
        1,
        2,
        6,
        243,
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "AB",
        "AA",
        "11",
        "aa",
        "#",
        "$",
        "##A",
        "AS",
    ]
    for invalid_input in invalid_inputs:
        try:
            data.update_all_cells_in_column('A', invalid_input)
            raise AssertionError(
                "update_all_cells_in_column did not throw (it should have) when given input: "
                + str(invalid_input))
        except Exception as e:
            # it will catch the Assertion error
            msg = "update_all_cells_in_column did not throw (it should have) when given input: " + str(
                invalid_input)
            if str(e) == msg:
                raise AssertionError(
                    "update_all_cells_in_column did not throw (it should have) when given input: "
                    + str(invalid_input))
    done(
        "update_all_cells_in_column should throw if the second argument is: empty list, not a list, very long list"
    )
from lib.alexnet import AlexNet
from lib.data import Data
from lib.perf import Perf
import lib.utils as utils
import matplotlib.pyplot as plt
import lib.log as log
import numpy as np
import argparse
import os

# constants
IMAGES_DIR = 'data/Flickr_2800'

# actual run
if __name__ == "__main__":
    # parse args out
    parser = argparse.ArgumentParser()
    parser.add_argument("img_class",
                        type=str,
                        help="The directory from Flickr_2800 to plot")
    parser.add_argument("x", type=int, help="Columns")
    parser.add_argument("y", type=int, help="Rows")
    args = parser.parse_args()

    d = Data()
    d.load_images(os.path.join(IMAGES_DIR, args.img_class), None)
    d.graph(args.x, args.y)

    plt.show()
예제 #21
0
 def __init__(self, wb_name, ws_name):
     self.iv = "O"
     self.dv = "X"
     self.data = Data(wb_name, ws_name)
     self.new_dv_list = []
     self.empty_char = 's'
예제 #22
0
from command import load_fixtures
from lib.data import Data, load_env_config
from lib.wiki.utils import trim


# ----------------
# INITIALISATION
# ----------------

config = load_env_config()
if "pytest" in sys.modules:
    logging.info("Running in PyTest: Reconfiguring to use test database.")
    config['REDIS_DATABASE'] = config['REDIS_TEST_DATABASE']

data = Data(config)  # <-- DB 1 for tests

test = AppTester(app.bottleApp)

if config['SINGLE_USER'] != 'YES':
    raise NotImplementedError("v. 0.1.0 does not implement multi-user mode.")


# -------------
# COMMON DATA
# -------------

ADMIN_USER = config['ADMIN_USER']  # <-- v.0.1.0, SINGLE_USER mode

USER_URI = '/user/{:s}'
EDIT_URI = '/edit/{:s}/{:s}/{:s}'
from lib.alexnet import AlexNet
from lib.data import Data
from lib.perf import Perf
import lib.utils as utils
import matplotlib.pyplot as plt
import lib.log as log
import numpy as np
import argparse
import os

# constants
IMAGES_DIR = 'data/Flickr_2800'

# actual run
if __name__ == "__main__":
    # parse args out
    parser = argparse.ArgumentParser()
    parser.add_argument("img_class", type=str, help="The directory from Flickr_2800 to plot")
    parser.add_argument("x", type=int, help="Columns")
    parser.add_argument("y", type=int, help="Rows")
    args = parser.parse_args()

    d = Data()
    d.load_images(os.path.join(IMAGES_DIR, args.img_class), None)
    d.graph(args.x, args.y)

    plt.show()
예제 #24
0
class CommandProcessor(object):
    """docstring for CommandProcessor"""
    def __init__(self, llama):
        super(CommandProcessor, self).__init__()
        self.llama = llama
        self.threads = llama.threads
        self.data = Data(self.threads)

        self.built_in_methods = {
            'READ OUT': self.read_out,
            'WRITE IN': self.write_in
        }

        self.command_handlers = {
            'REM': self.rem,
            'NOP': self.nop,
            'COME FROM': self.come_from,
            '<=': self.assign_left,
            '=>': self.assign_right,
            '<-': self.subtract_left,
            '->': self.subtract_right,
            '<+': self.add_left,
            '+>': self.add_right
        }

    def come_from(self, x, y, thread=0):
        pass

    def check_come_from_value(self, ip, value, thread=0):

        # COME FROM statements that aren't important enough will be
        # ignored.
        if not self.llama.is_important_enough(
                self.threads.code(thread)[ip].importance):
            return False

        # Get the IP of the statement that is ip_dir from the COME FROM
        # and get whatever value the command has.
        new_ip = self.llama.next_ip(ip)
        compare_value = self.llama.threads.code(thread)[new_ip].command
        compare_value = self.threads.symbol_table(
            thread).get_from_symbol_table(compare_value)
        return value == compare_value

    def check_come_from(self, value, thread=0):
        # TODO Index COME Froms? - may be fun with a constantly changine
        # code base
        #        value = self.symbol_table.get_from_sybmol_table(value)
        for x in range(0, len(self.threads.code(thread))):
            if self.threads.code(thread)[x].command == 'COME FROM':
                if self.check_come_from_value(x, value):
                    # TODO: Multithread here - array of ip or program state?
                    new_thread_id = self.threads.copy(thread)
                    new_ip = self.llama.next_ip(x)
                    new_ip = self.llama.next_ip(new_ip)
                    self.threads.set_ip(new_ip, thread=new_thread_id)
                    # self.threads.set_ip(new_ip, thread=thread)
                    #self.last_value = None # To prevent redoing this
                    self.threads.set_last_value(
                        None, thread=thread)  # To prevent redoing this
                    self.threads.set_last_value(
                        None, thread=new_thread_id)  # To prevent redoing this
                    self.debug(3,
                               "Executing Come From: Sending IP to %s" %
                               new_ip,
                               thread=thread,
                               ip=self.threads.ip(thread))
                    self.debug(3,
                               "The IP will increment from that point",
                               thread=thread,
                               ip=self.threads.ip(thread))
                    return True
                else:
                    pass
        return False

    def read_out(self, x, thread=0):
        data = None
        target = self.threads.symbol_table(thread).get_from_symbol_table(x)
        if String.is_string(target):
            string_value = String(target)
            data = string_value.decode(target[1:])
        elif Numeric.is_numeric(target):
            numeric_value = Numeric(target)
            data = numeric_value.decode(target[1:])
        elif Boolean.is_boolean(target):
            boolean_value = Boolean(target, classic=self.llama.tight)
            data = boolean_value.decode(target)
        elif Vector.is_vector(target):
            vector_value = Vector(target)
            data = vector_value.decode()
        elif Indicator.is_indicator(target):
            # Indicators cannot be read out
            self.debug(1,
                       "Attempt to read out an indicator: %s" % target,
                       msg_type="WRN",
                       thread=thread,
                       ip=self.threads.ip(thread))

        if data:
            self.debug(4, "Called readout with %s displayed as %s" % \
                    (target, data), thread=thread,
                    ip=self.threads.ip(thread))
            print data

    def write_in(self, x, thread=0):
        pass

    def nop(self, x, y, thread=0):
        pass

    def rem(self, x, y, thread=0):
        pass

    def assign_left(self, x, y, thread=0):
        for x_value in x:
            for y_value in y:
                if x_value.command in self.built_in_methods:
                    self.debug(4, "Built in method %s is target of assignment" %  \
                            x_value.command, thread=thread,
                            ip=self.threads.ip(thread))
                    self.built_in_methods[x_value.command](y_value.command,
                                                           thread=thread)
                else:
                    new_symbol = self.threads.symbol_table(
                        thread).get_new_symbol(y_value.command)
                    symbol = self.threads.symbol_table(thread)
                    self.threads.symbol_table(thread).symbol_table[
                        x_value.command] = y_value.command
                    self.llama.threads.set_last_value(new_symbol,
                                                      thread=thread)
                    self.debug(3,
                               "Assigned:  \x1b[1;33m%s = %s" %
                               (x_value.command, y_value.command),
                               thread=thread,
                               ip=self.threads.ip(thread))
                    if self.check_come_from(new_symbol):
                        self.debug(3,
                                   "COME FROM found after left assignment",
                                   thread=thread,
                                   ip=self.threads.ip(thread))
                        return

    def assign_right(self, x, y, thread=0):
        return self.assign_left(y, x, thread=thread)

    def subtract_left(self, x, y, thread=0):
        pass

    def subtract_right(self, x, y, thread=0):
        return self.subtract_left(y, x)

    def add_left(self, x, y, thread=0):
        for x_value in x:
            for y_value in y:
                self.data.add(y_value.command, x_value.command)

    def add_right(self, x, y, thread=0):
        return self.add_left(y, x)

    def extract_command_elements(self, command_stack, thread=0):
        state = 0
        arg_0 = []
        commands = []
        arg_1 = []

        for x in command_stack:

            is_command = CommandConstants.is_command(x)
            #            is_symbol = self.symbol_table.is_symbol(x)

            if is_command and state == 0:
                state = 1
            elif not is_command and state == 1:
                state = 2

            if state == 0:
                arg_0.append(x)
            elif state == 1:
                commands.append(x)
            elif state == 2:
                arg_1.append(x)

        return (arg_0, commands, arg_1)

    def is_constant(self, value, thread=0):
        if self.threads.symbol_table(thread).is_symbol(value):
            return False

        return String.is_string(value) or Numeric.is_numeric(value) or \
                Boolean.is_boolean(value) or Vector.is_vector(value) or \
                Indicator.is_indicator(value)

    def flatten(self, args):
        new_symbol = [x for x in args if not self.is_constant(x.command)]
        # TODO - betterized flattening - taking into account types?
        new_const = ''.join(
            [x.command for x in args if self.is_constant(x.command)])
        if new_const:
            c = Command()
            c.command = new_const
            new_symbol.append(c)
        return new_symbol

    def process(self, command_stack, thread=0):

        arg_0, commands, arg_1 = \
                self.extract_command_elements(command_stack, thread=thread)

        self.debug(11,
                   "Executing command: %s" % command_stack,
                   thread=thread,
                   ip=self.threads.ip(thread))
        self.debug(11,
                   "Raw Arg0:          %s" % arg_0,
                   thread=thread,
                   ip=self.threads.ip(thread))
        self.debug(11,
                   "Commands:          %s" % commands,
                   thread=thread,
                   ip=self.threads.ip(thread))
        self.debug(11,
                   "Raw Arg1:          %s" % arg_1,
                   thread=thread,
                   ip=self.threads.ip(thread))

        # TODO - needs code to flatten constants (auto append)
        arg_0 = self.flatten(arg_0)
        arg_1 = self.flatten(arg_1)

        self.debug(11,
                   "Flattened Arg0:    %s" % arg_0,
                   thread=thread,
                   ip=self.threads.ip(thread))
        self.debug(11,
                   "Flattened Arg1:    %s" % arg_1,
                   thread=thread,
                   ip=self.threads.ip(thread))

        for c in commands:
            self.debug(11, "Executing %s against %s and %s" % \
                    (c, arg_0, arg_1), thread=thread,
                    ip=self.threads.ip(thread))
            self.command_handlers[c.command](arg_0, arg_1, thread=thread)
예제 #25
0
import sys
from lib.data import Data

#print(int(sys.argv[1]))

try:
    size = sys.argv[1]
    # Generate Images

    if isinstance(int(size), int) != True:
        raise Exception()
    if int(size) == 0:
        size = 128
    print("Generating Image size (" + size + "x" + size + ")px")
    size = int(size)
    Data().gen(size)
except:
    print("Please pass correct image size")
예제 #26
0
def test1(done):
    data = Data(test_wb_name, ws_name)

    # write to column b
    expected_outcome = [
        "thedinner", "cool", "me", "tester", "please", "work", "im", "begging",
        'you'
    ]

    # try to update the cells make sure to catch all errors
    try:
        data.update_all_cells_in_column("B", expected_outcome)
    except Exception as e:
        raise AssertionError(str(e))

    # read column b make sure that you get the expected outcome
    outcome = data.return_column_as_list("B")
    msg = str(outcome[0:9]) + " was not equal to " + str(expected_outcome)
    assert outcome[0:9] == expected_outcome, msg

    # write to column b with some none values
    new_cells = [
        'does', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'work?'
    ]
    expected_outcome = [
        "does", "cool", "me", "tester", "please", "work", "im", "begging",
        'work?'
    ]

    # try to update the cells make sure to catch all errors
    try:
        data.update_all_cells_in_column("B", new_cells)
    except Exception as e:
        raise AssertionError(str(e))

    # read column b make sure that you get the expected outcome
    outcome = data.return_column_as_list("B")
    msg = str(outcome)[0:9] + " was not equal to " + str(expected_outcome)
    assert outcome[0:9] == expected_outcome, msg

    # reset column b to empty
    empty_list = ["", "", "", "", "", "", "", "", "",
                  ""]  #TODO make this more robust
    try:
        data.update_all_cells_in_column('B', empty_list)
    except Exception as e:
        raise AssertionError(str(e))

    # read column b assert that its empty
    outcome = data.return_column_as_list("B")
    msg = str(outcome) + " was not equal to " + str(empty_list)
    assert outcome == empty_list, msg

    done(
        "Series of reads and writes to excel file. The reads make sure that the writes actually worked."
    )
예제 #27
0
from lib.instagram.instagram import Instagram
from lib.engine import Engine
from lib.data import Data
from tqdm import tqdm
import time
import random

if __name__ == "__main__":
    instagram = Instagram()
    instagram.username = "******"
    instagram.password = "******"
    instagram.login()

    data = Data()
    data = tqdm(data.user_list)
    data.set_description("Crawling users...")
    for datum in data:
        user = instagram.goto_user(datum["userName"])
        followers = user.followers
        followers = tqdm(followers)

        engine = Engine(datum)
        for follower in followers:
            followers.set_description(
                "[igfollowers][{}] Saving user data...".format(
                    datum["userName"]))
            engine.save(follower)
        random_number = random.randint(10000, 50000) / 1000
        print("[igfollowers] Sleeping for {}s".format(random_number))
        time.sleep(random_number)
예제 #28
0
Test the redis interface for user and docs handling.
"""

import os

from lib.data import Data
from lib.ebook import write_epub

config = {
    'REDIS_HOST': 'localhost',
    'REDIS_PORT': 6379,
    'REDIS_DATABASE': 1,  # <-- TESTING
    'ADMIN_USER': '******',
    'TIME_ZONE': 'Australia/Sydney',
}

data = Data(config, strict=True)


def test_write_epub():
    """
    Create a hash, find its key, delete it.
    """

    file_path = '/tmp/eukras-help.epub'
    if os.path.exists(file_path):
        os.remove(file_path)

    write_epub('eukras', 'help', file_path)
    assert os.path.exists(file_path)
예제 #29
0
def update_all_cells_in_column_throws(done):
    # create instance of data class
    data = Data(test_wb_name, ws_name)

    # list of all invalid inputs
    invalid_inputs = [
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        1,
        2,
        3,
        243,
        "9",
        "AB",
        "AA",
        "11",
        "aa",
        "#",
        "$",
        "##A",
        "AS",
    ]
    for invalid_input in invalid_inputs:
        try:
            data.update_all_cells_in_column(invalid_input, ["tester"])
            raise AssertionError(
                "update_all_cells_in_column did not throw (it should have) when given input: "
                + str(invalid_input))
        except Exception as e:
            # it will catch the Assertion error
            msg = "update_all_cells_in_column did not throw (it should have) when given input: " + str(
                invalid_input)
            if str(e) == msg:
                raise AssertionError(
                    "update_all_cells_in_column did not throw (it should have) when given input: "
                    + str(invalid_input))
    done(
        "update_all_cells_in_column should throw if it is passed anything but a non-capital letter"
    )
예제 #30
0
def main():
    # Parse input arguments and store in config_data
    config_data = setup_config()

    graph_mode = config_data['graph_mode']
    if graph_mode == GRAPH:
        from lib.draw import draw_rank
        from lib.draw import manhattan
        from lib.draw import manhattan_all

    # Parse json files from Training folder and Testing folder
    logger.info("Parse training json files from %s", config_data['train_path'])
    train_path = config_data['train_path']
    train_pickle = config_data['train_pickle']
    train_file = config_data['train_file']
    train_label = config_data['train_label']
    if not train_pickle:
        parse_json(train_path, train_file)

    mode = config_data['mode']
    test_path = config_data['test_path']
    test_file = config_data['test_file']
    running_mode = config_data['running_mode']
    output_path = config_data['output_path']

    if mode == TEST_MODE:
        if running_mode == NORMAL_MODE:
            logger.info("Parse testing json files from %s", test_path)
            parse_json(test_path, test_file)
        else:
            logger.info("Parse testing json file from stdin")
            parse_json_stdin(test_file)

    # Load training data and testing data
    train_data = Data()
    filter_feature = config_data['filter_feature']
    if train_pickle:
        with open(train_pickle, 'rb') as f:
            train_data = pickle.load(f)
    else:
        train_data.loadData(train_file, filter_feature)

    # Train classifier by training set and test on testing set
    # Return pedia which contain pedia score, label and gene id
    # We can add filter_feature to remove the feature we don't want
    # to be trained by the classifier. For example
    # filter_feature = [FM_IDX, GESTALT_IDX]
    if mode == TEST_MODE:
        train = train_data.data

        test_data = Data()
        test_data.loadData(test_file, filter_feature)
        test = test_data.data
        pedia = classify_test(train, test, output_path, config_data)

        rank(pedia, output_path)
        if graph_mode == GRAPH:
            for case in pedia:
                manhattan(pedia, output_path, config_data['pos_file'], case)
            manhattan_all(pedia, output_path, config_data['pos_file'])

    elif mode == LOOCV_MODE:
        ite = 0
        train = train_data.data
        pedia = classify_loocv(train, output_path, config_data, ite + 1)

        rank(pedia, output_path)
        if graph_mode == GRAPH:
            for case in pedia:
                manhattan(pedia, output_path, config_data['pos_file'], case)
    elif mode == PARAM_TEST_MODE:
        for ite in range(config_data['cv_rep']):
            logger.info("Start CV repetition %d", ite + 1)
            path = output_path + "/cv_" + str(ite)
            if not os.path.exists(path):
                os.makedirs(path)
            train = train_data.data
            pedia = classify_cv_tuning_test(train, path, config_data)

            rank_tuning(train_label, path, config_data)
            if graph_mode == GRAPH:
                for case in pedia:
                    manhattan(pedia, path, config_data['pos_file'], case)
                manhattan_all(pedia, path, config_data['pos_file'])
        rank_all_cv_tuning(train_label, output_path, config_data['cv_rep'])
    else:
        for ite in range(config_data['cv_rep']):
            logger.info("Start CV repetition %d", ite + 1)
            path = output_path + "/cv_" + str(ite)
            if not os.path.exists(path):
                os.makedirs(path)
            train = train_data.data
            pedia = classify_cv(train, path, config_data, ite + 1)

            rank(pedia, path)
            if graph_mode == GRAPH:
                for case in pedia:
                    manhattan(pedia, path, config_data['pos_file'], case)
                manhattan_all(pedia, path, config_data['pos_file'])
        rank_all_cv(train_label, output_path, config_data['cv_rep'])
예제 #31
0
def get_redis_client() -> Data:
    """Initialises a Redis client from environment variables."""
    config = load_env_config()
    if "pytest" in sys.modules:
        config['REDIS_DATABASE'] = '1'
    return Data(config)
예제 #32
0
def run():
    cipher = AES.new(bytes(key), AES.MODE_ECB)
    out1 = cipher.decrypt(bytes(in1))
    out2 = Data(out1[0:-out1[-1]])  # Remove padding
    print(out2.toascii())
예제 #33
0
def save_user_document(data: Data, user_slug: str, doc_slug: str):
    """Saves a stored document into a directory as text files."""
    dst_dir = fixtures_dir(doc_slug)
    dst_dict = data.userDocument_get(user_slug, doc_slug)
    save_dir(dst_dir, dst_dict)
    print("Saved: {} ({:d} files)".format(doc_slug, len(dst_dict)))
def main():
    """ The main routine. """

    # Fix random seeds for reproducibility - these are themselves generated from random.org
    # From https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(91)
    rn.seed(95)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
                                  inter_op_parallelism_threads=1)
    tf.set_random_seed(47)
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    k.set_session(sess)

    # Enable simple logging
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    # Parse command line arguments
    args = parseargs()

    # Create run folder
    output_directory = create_output_folder(args.output)

    # Write arguments to file
    with open(output_directory + 'arguments.txt', 'a') as arguments_file:
        for arg in vars(args):
            arguments_file.write(
                str(arg) + ': ' + str(getattr(args, arg)) + '\n')

    ##############
    # Prepare data
    print('')
    data = Data(incidences_file=args.incidences,
                specifications_file=args.specifications,
                plot_data=args.plotData,
                output_directory=output_directory)
    data.state(message='Raw data')

    data.filter_cases(cases_file=args.cases)
    data.state(message='Filtered SEER*Stat cases from ASCII')

    # Determine inputs, filter, and pre process them
    data.apply_data_pipeline(pipelines.data_pipeline_full, args.oneHotEncoding)
    data.state(
        message=
        'Remove irrelevant, combined, post-diagnosis, and treatment attributes'
    )

    data.create_target(args.task)
    data.state(message='Create target label indicating cancer survival for ' +
               args.task)

    encodings = data.finalize()
    data.state(message='Remove inputs with constant values')

    ###############
    # Prepare model
    model = Model(model_type=args.model,
                  task=args.task,
                  input_dim=(len(data.frame.columns) - 1),
                  encodings=encodings,
                  mlp_layers=args.mlpLayers,
                  mlp_width=args.mlpWidth,
                  mlp_dropout=args.mlpDropout,
                  mlp_emb_neurons=args.mlpEmbNeurons,
                  svm_gamma=args.svmGamma,
                  svm_c=args.svmC,
                  logr_c=args.logrC)

    if args.plotData:
        model.plot_model(output_directory)

    ################
    # Carry out task
    experiment = Experiment(model=model,
                            data=data,
                            task=args.task,
                            valid_ratio=0.1,
                            test_ratio=0.1,
                            model_type=args.model,
                            encodings=encodings,
                            encode_categorical_inputs=args.oneHotEncoding,
                            plot_results=args.plotResults,
                            output_directory=output_directory)

    experiment.train(mlp_epochs=args.mlpEpochs)

    results_validate = experiment.validate()
    # Write validation results to file
    with open(output_directory + 'results_validate.txt', 'a') as results_file:
        for res in results_validate:
            results_file.write(res + '\n')

    # Only test final model, do not use for tuning
    if args.test:
        results_test = experiment.test()
        # Write validation results to file
        with open(output_directory + 'results_test.txt', 'a') as results_file:
            for res in results_test:
                results_file.write(res + '\n')

    ###################
    # Input importance
    if args.importance:
        importance = experiment.importance(encodings=encodings)
        # Write importance results to file
        with open(output_directory + 'results_importance.txt',
                  'a') as results_file:
            for (column, rel) in importance:
                results_file.write(column + '=' + str(rel) + '\n')