Example #1
0
def main():
    args = parser.parse_args()
    modify_arguments(args)

    # setting random seeds
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    with open(args.config_file, 'r') as stream:
        config = yaml.load(stream)
        args.config = Munch(modify_config(args, config))
    logger.info(args)

    if args.mode == 'train':
        train.train(args, device)
    elif args.mode == 'test':
        pass
    elif args.mode == 'analysis':
        analysis.analyze(args, device)
    elif args.mode == 'generate':
        pass
    elif args.mode == 'classify':
        analysis.classify(args, device)
    elif args.mode == 'classify_coqa':
        analysis.classify_coqa(args, device)
    elif args.mode == 'classify_final':
        analysis.classify_final(args, device)
Example #2
0
def main():
    init()
    # https://www.scrapehero.com/how-to-prevent-getting-blacklisted-while-scraping/
    # https://www.nasdaq.com/symbol/aapl/revenue-eps
    date_range_pull('2019-Aug-31', '2019-Sep-18')
    # daily_pull()
    analysis.analyze(config)
Example #3
0
def main():
    # Small
    SETTINGS = {
        "grid_size": 100,
        "min_block_size": 1,
        "dot_skip_rate": 1,
        "dotsize": 0.1,
        "fontsize": 10,
        "figsize": (10, 7),

        "min_event_size": 3,
        "lines_join_size": 5,
        "line_min_size": 10
    }

    # for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis", "small")):
    #     analysis.analyze(
    #         name=foldername,
    #         query_genome_path="samples/small/source.fasta",
    #         ref_genome_path="samples/small/{}.fasta".format(foldername),
    #         segments_file_path="BWA/small/{}/bwa_output.sam".format(foldername),
    #         show_plot=False,
    #         output_folder="output/analysis/small/{}".format(foldername),
    #         settings=SETTINGS.copy()
    #     )

    # Large
    SETTINGS = {
        "grid_size": int(1e5),
        "min_block_size": int(1e3),
        "dot_skip_rate": 10,
        "dotsize": 0.1,
        "fontsize": 8,
        "figsize": (10, 7),

        "min_event_size": int(1e4),
        "lines_join_size": "$min_event_size + 3",
        "line_min_size": "$min_event_size"
    }

    for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis")):
        if not os.path.isdir(mkpath(ROOT_PATH, "output", "analysis", foldername)) or foldername.strip("/").strip("\\") == "small":
            continue

        if int(foldername.strip("/").strip("\\").lstrip("large")) > 6:
            continue

        analysis.analyze(
            name=foldername,
            query_genome_path="samples/{}/large_genome1.fasta".format(foldername),
            ref_genome_path="samples/{}/large_genome2.fasta".format(foldername),
            # segments_file_path="BWA/{}/bwa_output.sam".format(foldername),
            segments_file_path="{}".format(analysis.segment_paths[foldername]),
            show_plot=False,
            output_folder="output/analysis/{}".format(foldername),
            settings=SETTINGS.copy()
        )
Example #4
0
 def run(self):
     global data_analyzed
     print("Starting playback thread " + str(self.thread_id) + "...")
     play(self.q, self.thread_id)
     print("Playback thread " + str(self.thread_id) + " done!")
     if data_analyzed == 0:
         analysis.analyze()
         data_analyzed = 2
         print("Timing data analyzed by thread " + str(self.thread_id))
     else:
         data_analyzed = data_analyzed - 1
Example #5
0
def main():
    SETTINGS = {
        "grid_size": int(1e5),
        "min_block_size": int(1e3),
        "dot_skip_rate": 10,
        "dotsize": 0.1,
        "fontsize": 8,
        "figsize": (10, 7),
        "min_event_size": int(1e4),
        "lines_join_size": "$min_event_size + 3",
        "line_min_size": "$min_event_size"
    }

    # for foldername in os.listdir(mkpath(ROOT_PATH, "output", "analysis")):
    #     if not os.path.isdir(mkpath(ROOT_PATH, "output", "analysis", foldername)) or foldername.strip("/").strip("\\") == "small":
    #         continue

    #     if int(foldername.strip("/").strip("\\").lstrip("large")) > 7:
    #         continue

    #     analysis.analyze(
    #         name=foldername,
    #         query_genome_path="samples/{}/large_genome1.fasta".format(foldername),
    #         ref_genome_path="samples/{}/large_genome2.fasta".format(foldername),
    #         # segments_file_path="BWA/{}/bwa_output.sam".format(foldername),
    #         segments_file_path="{}".format(analysis.segment_paths[foldername]),
    #         show_plot=False,
    #         output_folder="output/analysis/{}".format(foldername),
    #         settings=SETTINGS.copy()
    #     )

    for pair_name in os.listdir(
            mkpath(ROOT_PATH, "output", "alignment_grouptest")):
        first_genome_name, second_genome_name, _ = pair_name.split(".fasta_")
        # print(first_genome_name, second_genome_name)

        first_genome_path = mkpath("samples", "grouptest",
                                   first_genome_name + ".fasta")
        second_genome_path = mkpath("samples", "grouptest",
                                    second_genome_name + ".fasta")

        analysis.analyze(name=pair_name,
                         query_genome_path=first_genome_path,
                         ref_genome_path=second_genome_path,
                         segments_file_path=mkpath("output",
                                                   "alignment_grouptest",
                                                   pair_name),
                         show_plot=False,
                         output_folder=mkpath("output", "analysis_grouptest",
                                              pair_name),
                         settings=SETTINGS.copy())
Example #6
0
def main():
  # service = getOrCreateService()

  if not path.exists(csvFolder):
    # Query files from drive
    # https://drive.google.com/drive/u/0/folders/16SQCIy97DRDsRAhpKBilbMh8hlazQAhZ
    driveFolderId = '16SQCIy97DRDsRAhpKBilbMh8hlazQAhZ'
    files = getFilesInFolder(driveFolderId, mimeType='application/vnd.google-apps.spreadsheet')

    # export files
    exportFiles(files)

  # do the analysis
  analyze(csvFolder)
Example #7
0
def run(phonemes, words):
    """Driver to call parsing and comparing functions."""
    phoneme1, phoneme2 = parse_phoneme(phonemes)

    # print(phoneme1)
    # print(phoneme2)
    # print(words)
    # print('Target phonemes: [' + phoneme1 + '], [' + phoneme2 + ']')
    # print('Data set: ')
    # print(words)
    # for each word in words, return word-trans pair
    # wt_pairs = [Word(w) for w in words]
    # for w in wt_pairs: print(w)

    transcribed_words = [Word(w) for w in words]
    analyze(phoneme1, phoneme2, transcribed_words)
Example #8
0
def upload_file():
    if request.method == 'POST':
        saved_files = {}
        for file_name in ['buildings_file', 'central_file', 'schedule_file']:
            # check if the post request has the file part
            if file_name not in request.files:
                flash('No ' + file_name + 'part')
                return redirect(request.url)

            file = request.files[file_name]
            # if user does not select file, browser also
            # submit a empty part without filename
            if file.filename == '':
                flash('No selected file')
                return redirect(request.url)
            if file and allowed_file(file.filename):
                filename = secure_filename(file.filename)
                file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
                saved_files[file_name] = file_path
                file.save(file_path)

        intervals_dec, buildings, buildings_seat, building_codes = \
            analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file'])

        dict_buildings = {code: building.asdict() for code, building in buildings.items()}
        return render_template('filter.html', intervals_dec=intervals_dec, buildings=dict_buildings, building_codes=building_codes)
    return render_template('submit_files.html')
Example #9
0
def setup(trackid):
    global dd_snds, dd_tabs, dd_seq, dd_amp, beat_space, a, rm, patch_rhythm, patch_harmony, d_seq, d_func
    # play beat click
    dd_snds = ['../wav/alum3.wav']
    dd_tabs = SndTable(dd_snds)
    dd_seq = Seq(time=1, seq=[0], onlyonce=True)
    dd_amp = TrigEnv(dd_seq, dd_tabs, dur=.25, mul=.25).out()
    a = an.analyze(trackid)
    a = an.pick_events(a, [['action', ['start', 'stop']],
                           ['type', ['beats', 'sections', 'segments']]])
    beat_space = list(np.diff(np.array([e['time'] for e in a]))) + [0.0]
    # Seq seems to ignore the last value in its seq list - needed to add this dummy 0 at end to get all beats

    # channel 1 is rhythm
    # channel 2 is harmony

    s.programout(patch_rhythm, 1)

    an.show(an.GM_patches[patch_rhythm])

    s.programout(patch_harmony, 2)
    an.show(an.GM_patches[patch_harmony])
    an.show(str(len(beat_space)) + ' beats')
    d_seq = Seq(time=1, seq=beat_space, onlyonce=True)
    d_func = TrigFunc(d_seq, seq_callback, 'sequence')
    rm = RawMidi(seq_callback)
Example #10
0
    def search(self, query, search_type='AND', rank=False):
        """
        Search; this will return documents that contain words from the query,
        and rank them if requested (sets are fast, but unordered).

        Parameters:
          - query: the query string
          - search_type: ('AND', 'OR') do all query terms have to match, or just one
          - score: (True, False) if True, rank results based on TF-IDF score
        """
        if search_type not in ('AND', 'OR'):
            return []

        analyzed_query = analyze(query)
        # print(analyzed_query)
        results = self._results(analyzed_query)
        if search_type == 'AND':
            # all tokens must be in the document
            documents = [
                self.documents[doc_id] for doc_id in set.intersection(*results)
            ]
        if search_type == 'OR':
            # only one token has to be in the document
            documents = [
                self.documents[doc_id] for doc_id in set.union(*results)
            ]

        if rank:
            return self.rank(analyzed_query, documents)
        ids = [document.ID for document in documents]
        # return documents
        return ids
Example #11
0
async def handle_echo(reader, writer):
    data = await reader.read(BUFMAX)
    message = data.decode()
    analysis.analyze()
    # addr = writer.get_extra_info('peername')

    # print(f"Received {message!r} from {addr!r}")

    # print(f"Send: {message!r}")
    # with open('data2.csv', 'wb') as f:
    #     f.write(data)
    # writer.write(data)
    # await writer.drain()

    print("Close the connection")
    writer.close()
Example #12
0
def analyze(bot, update, args):
    """
        Analyzes the sentiment of a single inputed message or the last 10 messages
        of the user which called this function. By inputing 'me' a user can see
        what their average sentiment score is over their last 10 messages.
    """
    global client
    message = ' '.join(args)
    if (len(message.split()) == 1 and message.lower() == 'me'):
        user = update.message.from_user
        user_id = user.id
        chat_id = update.message.chat_id
        if chat_id in groups.keys():
            cur_group = groups[chat_id]
            if user_id in cur_group.keys():
                messages = cur_group[user_id]
                sentiment = analysis.get_average_sentiment(messages, client)
                avg_score = "%.4f" % sentiment.avg_score
                message = "{} has had an average sentiment score of {} for their last 10 messages"\
                                .format(user.first_name, avg_score)
                bot.send_message(chat_id=update.message.chat_id, text=message)
            else:
                message = "I don't have any messages stored from {} yet"\
                                .format(user.first_name)
                bot.send_message(chat_id=update.message.chat_id, text=message)
        else:
            message = "I don't have any messages stored from this chat yet"
            bot.send_message(chat_id=update.message.chat_id, text=message)
    else:
        score = "%.4f" % analysis.analyze(message, client).score
        message = "Your message of '" + message + "' yielded a score of: " + score
        bot.send_message(chat_id=update.message.chat_id, text=message)
Example #13
0
def readm0():
	bar = progressbar.ProgressBar()
	data = tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/pos/")
	data.extend(tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/neg/"))
	regexp = analysis._re_analysis()
	db = memory.recollect()
	return [analysis.analyze(open(w[0]).read(), db=db, regexp=regexp) for w in bar(data)]
Example #14
0
def deh120():
    saved_files = {}
    for file_name in [('buildings_file','building_abbreviations.csv'), ('central_file','centrally_scheduled_classrooms.csv'), ('schedule_file','ClassSchedule-23_comma.csv')]:
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_name[1])
        saved_files[file_name[0]] = file_path

    intervals_dec, buildings, building_codes = \
        analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file'])

    dict_buildings = {code: building.asdict() for code, building in buildings.items()}

    deh120 = dict_buildings['DEH']['rooms']['120']['occupancy_matrix']

    intervals_dec = get_intervals_dec(800, 1600, 30)
    days = DAYS

    open_times = []
    for day_i, day in enumerate(deh120):
        for int_i, occupied in enumerate(day):
            if occupied == 0:
                open_event = {
                    'title': 'DEH - 120 Open',
                    'start': intervals_dec[int_i],
                    'end': intervals_dec[int_i+1] if int_i + 1 < len(intervals_dec) else intervals_dec[int_i] + 30,
                    'dow': days[day_i],
                    'location': 'DEH - 120',
                }
                open_times.append(open_event)

    return str(open_times)
Example #15
0
def process(img):
    print('<>' * 10, 'Beginning of Process', '<>' * 10)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    imgs = pupil_finder.find(img)
    data = []
    sum_result = 0
    if (not imgs or len(imgs) != 4):
        print('Pupil not founded')
        return {'error': 'Pupila não encontrada!'}

    pieces = normalize.four_pieces(imgs)
    for i in range(0, len(pieces)):
        result, piece = analysis.analyze(pieces[i])

        retval, buffer = cv2.imencode('.jpg', piece)
        base64_bytes = base64.b64encode(buffer)
        base64_string = base64_bytes.decode('utf-8')
        sum_result += result
        data.append({'result': round(result, 2), 'img': base64_string})

    avarage = sum_result / 4
    return {'images': data, 'avarage': round(avarage, 2)}


# img = cv2.imread('images/without_diabetes/5d.png')
# img = cv2.imread('images/with_diabetes/5esim.png')
# img = cv2.imread('images/with_diabetes/4dsim.png')
# img = cv2.imread('images/without_diabetes_with_background/3e.png')
# cv2.imshow('img', img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
# result = process(img)
# print('result: ', result)
Example #16
0
def main(year, inform):
    df = open_file('../input/suicides.csv')
    data_clean = cleaning(df)
    data_imported = impor(data_clean,"https://restcountries.eu/rest/v2/name/")
    data_filtered= filtering(data_imported, year)
    path, path2, path3,path4 = analyze(data_filtered,year)
    file_to_send = pdf(path,path2,path3,path4, 'Helvetica',year,inform)
    emailing(file_to_send, year)
Example #17
0
def meow(days=None,
         dates=None,
         limit=None,
         short=None,
         fail=True,
         exclude=None,
         job_type=None,
         down_path=config.DOWNLOAD_PATH,
         periodic=False,
         ):
    """
        This function actually runs the whole work,
        you can import it anywhere and run with parameters:

    :param days: how many days history to take, usually 7 (week) is enough
    :param dates: specific dates in format ["%m-%d", ..]: ['04-15', '05-02']
    :param limit: limit overall amount of jobs to analyze
    :param short: analyze only this type of jobs,
                    accepts short name: "ha","upgrades","nonha"
    :param fail: whether analyze and print only failed jobs (true by default)
    :param exclude: exclude specific job type: "gate-tripleo-ci-f22-containers"
    :param job_type: include only this job type (like short, but accepts
                        full name): "gate-tripleo-ci-f22-nonha"
    :param down_path: path on local system to save all jobs files there
    :param periodic: if take periodic (periodic=True) or patches (False)
    :return: parsed jobs data, ready for printing to HTML or console
    """
    if not periodic:
        g = Gerrit(period=days)
        #gerrit = g.get_project_patches(config.PROJECTS)
        # Dump gerrit data for investigation
        #with open("/tmp/gerrit", "w") as f:
        #    f.write(json.dumps(gerrit))
        # If debug mode
        with open("/tmp/gerrit", "r") as f:
            gerrit = json.loads(f.read())
        jobs = (job for patch in gerrit for job in Patch(patch).jobs)
    else:
        jobs = (job
                for url in config.PERIODIC_URLS
                for job in Periodic(
                    url, down_path=down_path, limit=limit).jobs)
    f = Filter(
        jobs,
        days=days,
        dates=dates,
        limit=limit,
        short=short,
        fail=fail,
        exclude=exclude,
        job_type=job_type,
        periodic=periodic
    )
    filtered = f.run()
    ready = []
    for job in filtered:
        ready.append(analyze(job, down_path=down_path))
    return ready
Example #18
0
    def index_document(self, document):
        if document.ID not in self.documents:
            self.documents[document.ID] = document
            document.analyze()

        for token in analyze(document.fulltext):
            if token not in self.index:
                self.index[token] = set()
            self.index[token].add(document.ID)
Example #19
0
def run_streaming_simulation(ge_args, m_args, trace_dir, csv_path):
    """
    Runs simulation of using MiDAS
    """
    print("Running simulation with MiDAS...")

    recv_ge = network_model.GEModel(ge_args)
    box = network_model.NetworkBox(recv_ge, m_args.latency,
                                   model_constants.RATE_5Mbps)
    midas = midas_streaming_model.MiDAS(m_args, box)
    if not midas.valid_rate():
        print("not valid set of parameter, skipping")
        return
    print("Parameters are N = {}, B = {}, T = {}".format(
        midas.N, midas.B, midas.T))

    # generate stream of frames to use with 1080 60fps for 5 min
    stream_gen = streams.FixedSizeStream(model_constants.RES_1080P,
                                         model_constants.FPS_60)
    trace_path = "cbr2500.txt"
    # for trace_path in os.listdir(trace_dir):
    #     if ".swp" in trace_path:
    #         continue

    frames = stream_gen.from_trace(
        os.path.abspath(trace_dir + "/" + trace_path))

    # initialize GE models and network box
    midas.box.recv_ge_model = network_model.GEModel(ge_args)

    # use streaming
    print("Using {} frames with size {}...".format(len(frames),
                                                   frames[0].size))
    metrics, loss = midas.transmit_source_blocks(frames)

    # average metrics
    analysis.analyze(metrics,
                     len(frames) * midas.k,
                     "MiDAS",
                     midas,
                     csv_path,
                     latency=midas.box.latency,
                     total_lost=loss)
Example #20
0
def mainline(train=False):
    datadir = DIR['BASE'] + "data/"
    if train is True:
        featurefile = datadir + 'train-features.txt'
        xmldir = DIR['BASE'] + "demo/train/"
    else:
        featurefile = datadir + 'test-features.txt'
        xmldir = DIR['BASE'] + "demo/test/"
    deleteFiles([featurefile])
    #infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print (str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR['DATA'] + "sec-tfidf-model.txt"
    if train is False:
        # Testing
        outfile = DIR['DATA'] + "sec-tfidf-test-out.txt"
        for gamma in [1.0]:
            predictSvm(featurefile, model + str(gamma), outfile)
            outstring = "Testing. Weight : " + str(gamma)
            analyze(featurefile, outfile, outstring)
        #pickleIt()
    else:
        # Training
        outfile = DIR['DATA'] + "sec-tfidf-train-out.txt"
        deleteFiles([outfile])
        for gamma in [1.0]:
            #trainSvm(featurefile, model + str(gamma), gamma)
            trainSvm(featurefile, model, gamma)
            predictSvm(featurefile, model, outfile)
            outstring = "Training. gamma : " + str(gamma)
            analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
Example #21
0
def main():
    print("archivo?")
    archivo = input()
    input_file = open("./inputs/" + archivo)
    data = input_file.read()
    input_file.close()
    name, characters, keywords, tokens = decomp.main(data)
    final_dfa, dfas = analysis.analyze(name, characters, keywords, tokens)
    to_file.create(final_dfa, dfas, name)
    print("Hecho archivo de nombre ", name)
Example #22
0
def compile_to_ctree(pysource):
    sys.setrecursionlimit(100000)
    t0 = time.time()

    print 'Generating ctree...'
    raw_ast = parse(pysource)
    annotated_ast = analysis.analyze(raw_ast)
    c_ast = ctree.transform_to_ctree(annotated_ast)
    
    print 'Finished generating ctree'
    print '[elapsed time: %.2f seconds]' % (time.time() - t0)
    return c_ast
Example #23
0
def compile_to_ctree(pysource):
    sys.setrecursionlimit(100000)
    t0 = time.time()

    print 'Generating ctree...'
    raw_ast = parse(pysource)
    annotated_ast = analysis.analyze(raw_ast)
    c_ast = ctree.transform_to_ctree(annotated_ast)

    print 'Finished generating ctree'
    print '[elapsed time: %.2f seconds]' % (time.time() - t0)
    return c_ast
Example #24
0
def main():
    #print "Pandas Version", pd.__version__
    
    symbol_file = sys.argv[1]
    startdate = sys.argv[2]
    enddate = sys.argv[3]
    starting_equity = sys.argv[4]
    benchmark = sys.argv[5]
    '''
    benchmark = sys.argv[5]
    entry_strategy = sys.argv[6]
    exit_strategy = sys.argv[7]
    entry_filter = sys.argv[8]
    exit_filter = sys.argv[9]
    pos_size_strategy = sys.argv[10]
    '''

    # Get Market data from Yahoo files
    #d_data, ls_symbols = marketdata.get_data(startdate, enddate,symbol_file,benchmark)
    #df_prices = d_data['close']
    
    # Get Market data from SQLite database (previously loaded from Yahoo
    df_prices, ls_symbols = marketdata.get_sqlitedb_data(startdate, enddate, symbol_file, benchmark, 'Close')
    
    
        
    #df_sma = indicators.sma(df_prices,50)
    
    #df_uch, df_lch = indicators.channel(df_prices,50)
    
    #analysis.plot(df_uch.index,df_prices['AAPL'],df_uch['AAPL'],df_lch['AAPL'],df_sma['AAPL'])
    
    # Find Events and create Event profile
    df_bb_events = events.find_bb_events(ls_symbols, df_prices, benchmark)
    #Generate an Event Profile
    simulator.marketsim(100000, 'mydata.csv', 'portval.csv',df_prices)
    
    #Analyze the simulation Results
    analysis.analyze('portval.csv')
Example #25
0
def mainline(train=False):
    datadir = DIR["BASE"] + "data/"
    if train is True:
        featurefile = datadir + "train-features.txt"
        xmldir = DIR["BASE"] + "demo/train/"
    else:
        featurefile = datadir + "test-features.txt"
        xmldir = DIR["BASE"] + "demo/test/"
    deleteFiles([featurefile])
    # infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print (str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR["DATA"] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR["DATA"] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR["DATA"] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Example #26
0
def mainline(train=False):
    datadir = DIR['BASE'] + "data/"
    if train is True:
        featurefile = datadir + 'train-features.txt'
        xmldir = DIR['BASE'] + "demo/train/"
    else:
        featurefile = datadir + 'test-features.txt'
        xmldir = DIR['BASE'] + "demo/test/"
    deleteFiles([featurefile])
    #infile = xmldir + 'C08-1122-parscit-section.xml'
    client_socket = getConnection()
    for infile in glob(xmldir + "*.xml"):
        try:
            print infile + " is being processed."
            if train is True:
                generateTrainFeatures(client_socket, infile, featurefile)
            else:
                generateTestFeatures(client_socket, infile, featurefile)
        except Exception as e:
            print "Some Exception in the main pipeline"
            print(str(type(e)))
            print str(e)
            logging.exception("Something awfull !!")
    model = DIR['DATA'] + "sec-tfidf-model.txt"
    if train is False:
        # TESTING
        outfile = DIR['DATA'] + "sec-tfidf-test-out.txt"
        predictSvm(featurefile, model, outfile)
        extractValues(outfile)
        outstring = "Default values Test results"
        analyze(featurefile, outfile, outstring=outstring)
        pickleIt()
    else:
        # TRAINING
        trainSvm(featurefile, model)
        outfile = DIR['DATA'] + "sec-tfidf-train-out.txt"
        predictSvm(featurefile, model, outfile)
        outstring = "Default values"
        analyze(featurefile, outfile, outstring=outstring)
Example #27
0
def filter():
    saved_files = {}
    for file_name in [('buildings_file','building_abbreviations.csv'), ('central_file','centrally_scheduled_classrooms.csv'), ('schedule_file','ClassSchedule-23_comma.csv')]:
        file_path = os.path.join(app.config['UPLOAD_FOLDER'], file_name[1])
        saved_files[file_name[0]] = file_path

    intervals_dec, buildings, building_codes = \
        analyze(saved_files['buildings_file'], saved_files['central_file'], saved_files['schedule_file'])

    dict_buildings = {code: building.asdict() for code, building in buildings.items()}

    return render_template('filter.html', intervals_dec=intervals_dec, buildings=dict_buildings,
                           building_codes=building_codes)
Example #28
0
def main():
    #print "Pandas Version", pd.__version__

    symbol_file = sys.argv[1]
    startdate = sys.argv[2]
    enddate = sys.argv[3]
    starting_equity = sys.argv[4]
    benchmark = sys.argv[5]
    '''
    benchmark = sys.argv[5]
    entry_strategy = sys.argv[6]
    exit_strategy = sys.argv[7]
    entry_filter = sys.argv[8]
    exit_filter = sys.argv[9]
    pos_size_strategy = sys.argv[10]
    '''

    # Get Market data from Yahoo files
    #d_data, ls_symbols = marketdata.get_data(startdate, enddate,symbol_file,benchmark)
    #df_prices = d_data['close']

    # Get Market data from SQLite database (previously loaded from Yahoo
    df_prices, ls_symbols = marketdata.get_sqlitedb_data(
        startdate, enddate, symbol_file, benchmark, 'Close')

    #df_sma = indicators.sma(df_prices,50)

    #df_uch, df_lch = indicators.channel(df_prices,50)

    #analysis.plot(df_uch.index,df_prices['AAPL'],df_uch['AAPL'],df_lch['AAPL'],df_sma['AAPL'])

    # Find Events and create Event profile
    df_bb_events = events.find_bb_events(ls_symbols, df_prices, benchmark)
    #Generate an Event Profile
    simulator.marketsim(100000, 'mydata.csv', 'portval.csv', df_prices)

    #Analyze the simulation Results
    analysis.analyze('portval.csv')
Example #29
0
def scrape_job(id, link, json_skills):
    skills = pd.read_json(
        json_skills
    )  # Pandas dataframes can't be passed directly into celery tasks
    j = get_job(link)
    j['JobId'] = id
    j['link'] = link
    # jobs_table.put_item(Item=j)
    jobs_table_queue.append(j)
    # print("Passing job to analyze")
    d = analyze(j, skills, analysis_table)
    # if len(d.keys()) == len(d.values()):
    # analysis_df.loc[id] = d
    return d
Example #30
0
def make_analyze():

    try:
        #Load the data
        data = request.get_json()

    except Exception as e:
        raise e

    if data == {}:
        return (bad_request())
    else:

        #Get the text and the language

        try:
            lang = data['lang']
        except:
            try:
                lang = detect_language(data['text'])
                print(lang)
            except:
                responses = jsonify(
                    "Error in vectorize: language field is missing")
                return responses
        try:
            text = data['text']  # we assume text is tokenized
        except:
            responses = jsonify("Error in analyze: text is missing")
            return responses

        if lang not in ['en', 'es', 'ar', 'ro', 'fr']:
            responses = jsonify(
                message=
                "Language not available. Language must be in ['en','es','ar','ro','fr']"
            )
            return responses

        filename = os.path.join(os.path.dirname(__file__),
                                'models-registry.json')
        registry = load_data(filename)

        analysis = analyze(text, lang, registry)
        #print(analysis[0])
        #Send the response codes
        responses = jsonify(concepts=analysis[0],
                            key_ideas=analysis[1],
                            topics=analysis[2])
        responses.status_code = 200
        return responses
Example #31
0
def save_and_segment(doc_id, html, url):
    import suggestor
    title, text, words = analyze(html)
    l = len(words)
    r.hmset('doc:%s'%doc_id, {'title': title, 'text': text, 'len': l, 'url': url})
    r.incrbyfloat('total_len', l)
    for token in words:
        suggestor.add_query(token.word, token.weight)
        r.zadd(u'word:%s'%token.word, token.weight, doc_id)
        r.hmset(u'dw:%s:%s:%s'%(doc_id, token.word, token.fieldname), {
            'pos': token.pos,
            'len': token.len,
            'weight': token.weight,
        })
Example #32
0
def run_RQ_simulation(ge_args, rq_args, trace_dir, csv_path):
    """
    Runs simulation of using RaptorQ
    """
    # generate stream of frames to use with 1080 60fps for 5 min
    print("Running simulation with RaptorQ...")

    recv_ge = network_model.GEModel(ge_args)
    box = network_model.NetworkBox(recv_ge, rq_args.latency,
                                   model_constants.RATE_5Mbps)
    rq = raptorq_model.RaptorQ(rq_args, box)
    stream_gen = streams.FixedSizeStream(model_constants.RES_1080P,
                                         model_constants.FPS_60)

    trace_path = "cbr2500.txt"
    # for trace_path in os.listdir(trace_dir):
    #     if ".swp" in trace_path:
    #         continue

    frames = stream_gen.from_trace(
        os.path.abspath(trace_dir + "/" + trace_path))

    # initialize new GE models
    rq.box.recv_ge_model = network_model.GEModel(ge_args)

    # use RaptorQ
    print("Using {} frames with size {}...".format(len(frames),
                                                   frames[0].size))
    source_blks, total_symbols = rq.form_source_blocks(frames)
    print("Using {} source blocks...".format(len(source_blks)))
    metrics = []
    for blk in source_blks:
        metrics.append(rq.transmit_source_block(blk, model_constants.RQ_RATE))

    # average metrics
    analysis.analyze(metrics, total_symbols, "RQ", rq, csv_path)
Example #33
0
def analysis():
    if request.method == 'POST':
        topic = ast.literal_eval(request.form['topic'])
        sources_ids = ast.literal_eval(request.form['sources'])
        source_format_string = formatstring(sources_ids)
        page = ast.literal_eval(request.form['page'])
        fromDate = format_fromDate(ast.literal_eval(request.form['fromDate']))
        toDate = format_toDate(ast.literal_eval(request.form['toDate']))

        app.logger.info('POST anaysis: topic(%s), sources_ids(%s)'
                        % ('%s', source_format_string)
                        % tuple([topic] + sources_ids))

        analyze(topic, sources_ids)

        db = g.db
        cur = db.cursor()
        rule_count_dic = get_rule_count_dic(cur, topic, sources_ids, fromDate, toDate)

        from_post_rnum = (page-1)*config['perpage']
        post_ruleset_count_dic = get_post_ruleset_count_dic(cur, topic, sources_ids, from_post_rnum, config['perpage'], fromDate, toDate)

    return jsonify(rule_count_dic           = rule_count_dic,
                   post_ruleset_count_dic   = post_ruleset_count_dic)
Example #34
0
def main():
    # string = "((abc)|(dξc))*|ani"
    # string = trees.pre(string)
    # tree  = trees.evaluate(string)
    # trees.print2D(tree)
    # dfa = directo.directo(tree, string)
    # #graph.graph(dfa, "prueba")
    # #graph.to_txt(dfa, "prueba")
    # print(evaluate.is_in_language(dfa, "ab"))
    # print("Ingrese archivo ")
    # archivo = input()
    # archivo = open("./inputs/"+archivo)
    archivo = open("./inputs/DoubleAritmetica.ATG")
    data = archivo.read()
    archivo.close()
    name, characters, keywords, tokens, productions = decomp.main(data)
    dfa, dfas, parser = analysis.analyze(name, characters,keywords,tokens,productions)
    to_file.create(dfa, dfas, parser, name)
Example #35
0
def save_and_segment(doc_id, html, url):
    import suggestor
    title, text, words = analyze(html)
    l = len(words)
    r.hmset('doc:%s' % doc_id, {
        'title': title,
        'text': text,
        'len': l,
        'url': url
    })
    r.incrbyfloat('total_len', l)
    for token in words:
        suggestor.add_query(token.word, token.weight)
        r.zadd(u'word:%s' % token.word, token.weight, doc_id)
        r.hmset(u'dw:%s:%s:%s' % (doc_id, token.word, token.fieldname), {
            'pos': token.pos,
            'len': token.len,
            'weight': token.weight,
        })
Example #36
0
def respond_analysis(bot, update):
    """
        Listens to each message posted in the chat and if it is above or below
        a given threshold the bot will send a message directed at the user that
        sent the positive or negative message. Also stores each message it recieves
        in the groups hashmap. It makes a map for each group chat that contains
        each user and their 10 most recent messages.
    """
    global client, groups
    message = update.message
    text = message.text
    print(text)
    score = analysis.analyze(text, client).score
    if (score >= .5):
        score = "%.4f" % score
        bot_msg = "Whoa {} you are looking pretty happy there with a sentiment score of: {}" \
                    .format(message.from_user.first_name, str(score))
        bot.send_message(chat_id=message.chat_id, text=bot_msg)
    elif (score <= -.5):
        score = "%.3f" % score
        bot_msg = "You gotta calm down {}, you're super mad right now with a sentiment score of : {}" \
                    .format(message.from_user.first_name, str(score))
        bot.send_message(chat_id=message.chat_id, text=bot_msg)
    elif (score == 0.0):
        bot_msg = "I either can't analyze your message or you are extremely neutral {}" \
                    .format(message.from_user.first_name)
        bot.send_message(chat_id=message.chat_id, text=bot_msg)

    chat_id = message.chat_id
    user_id = message.from_user.id

    if chat_id in groups.keys():
        cur_group = groups[chat_id]
    else:
        groups[chat_id] = {}
        cur_group = groups[chat_id]

    if user_id in cur_group.keys():
        messages = cur_group[user_id]
    else:
        cur_group[user_id] = collections.deque(maxlen=10)
        messages = cur_group[user_id]
    messages.append(text)
def run(fitter,fitterParams,fitterCoeff,dataClass,label,valid=0.05,
        train="train.csv",test="test.csv",profile=False,nTrials=1,
        force=True,forceFeat=True,plot=False):
    trainFile = train
    testFile = test
    inDir,cacheDir,outDir = getDirsFromCmdLine()
    # add the label for this run (ie: SVM/Boost/LogisticRegression)
    outDir = pGenUtil.ensureDirExists(outDir + label +"/")
    # get the directories we want
    predictDir = pGenUtil.ensureDirExists(outDir + "predictions")
    if (profile and plot):
        profileDir = pGenUtil.ensureDirExists(outDir + "profile")
    else:
        profileDir = None
    # get the data object, by cache or otherwise 
    dataObj = \
    pCheckUtil.pipeline([[cacheDir+'data.pkl',getData,dataClass,outDir,
                          inDir+trainFile,valid,False,profileDir,]],forceFeat)
    return analyze(dataObj,inDir,outDir,testFile,fitter,fitterParams,
                   fitterCoeff,label,dataClass,nTrials,force,plot)
Example #38
0
sys.path.insert(0, './bayesian')
from bayesian import bayesian
sys.path.insert(0, './SVR')
from svr import svr

start = time.time()
ridge(sys.argv[1],sys.argv[2])
ridge_end = time.time()
lasso(sys.argv[1],sys.argv[2])
lasso_end = time.time()
bayesian(sys.argv[1],sys.argv[2])
bayesian_end = time.time()
svr(sys.argv[1],sys.argv[2])
svr_end = time.time()

print "Ridge Results"
print "Running Time: "+str(ridge_end-start)+" seconds"
analyze("ridge_out.csv")
print "---"
print "Lasso Results"
print "Running Time: "+str(lasso_end-ridge_end)+" seconds"
analyze("lasso_out.csv")
print "---"
print "Bayesian Results"
print "Running Time: "+str(bayesian_end-lasso_end)+" seconds"
analyze("bayesian_out.csv")
print "---"
print "SVR Results"
print "Running Time: "+str(svr_end-bayesian_end)+" seconds"
analyze("svr_out.csv")
Example #39
0
def test0(number):
	data = tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/pos/")
	data.extend(tools.loaddir("/home/aditya/Desktop/project/aclImdb/train/neg/"))
	print "analyze: "+str(analysis.analyze(open(data[number][0]).read(), debug=True))
	print "analyze: "+data[number][1]
Example #40
0
    # print(tagged_sents)

print(len(sents))

parsers = [nltk.RegexpParser(grammar1), nltk.RegexpParser(grammar2), nltk.RegexpParser(grammar3)]

# Dict to hold all found facts
results = {"FACT1": [], "FACT2": [], "FACT3": [], "FACT4": [], "FACT5": [], "FACT6": []}

# For all sentences
for sent in sents:
    # Tag with POS-tags
    tagged_sent = nltk.pos_tag(sent)
    for parser in parsers: # Run each parser
        parsed_sent = parser.parse(tagged_sent)
        results2 = analyze(parsed_sent)
        for result in results2["FACT1"]:
            results["FACT1"].append(result)
        for result in results2["FACT2"]:
            results["FACT2"].append(result)
        for result in results2["FACT3"]:
            results["FACT3"].append(result)
        for result in results2["FACT4"]:
            results["FACT4"].append(result)
        for result in results2["FACT5"]:
            results["FACT5"].append(result)
        for result in results2["FACT6"]:
            results["FACT6"].append(result)

# Find contradictions for FACT1
for result in results["FACT1"]:
Example #41
0
        set.append(curr)
        bucket.remove(curr)
    all_sets.append(set)

for i in range(07):
    test_set = all_sets[i]
    train_set = []
    for set in [all_sets[z] for z in range(07) if z != i]:
        train_set.extend(set)
    for key in train_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    trainSvm(featurefile, model, gamma=1)
    predictSvm(featurefile, model, outfile)
    outstring = "Training Fold : " + str(i)
    print "************* " + outstring + " *************"
    analyze(featurefile, outfile, resfile, outstring)

    deleteFiles([featurefile, outfile])

    for key in test_set:
        writeToFile(featurefile, data[key]['features'] + '\n', 'a')
    predictSvm(featurefile, model, outfile)
    outstring = "Testing Fold : " + str(i)
    pre, rec = analyze(featurefile, outfile, resfile, outstring)
    precision.append(pre)
    recall.append(rec)

print precision
print sum(precision) / float(len(precision))
print recall
print sum(recall) / float(len(recall))
Example #42
0
 def tokenize(self, text):
     return analysis.analyze(text)
Example #43
0
def get_results(testcases):
    for i, testcase in enumerate(testcases):
        text, expected = testcase
        result = analysis.analyze(text)
        yield i + 1, expected, result['classification'], result[
            'polarity'], result['emotions'], text
Example #44
0
 def analyze(self, args):
     from pprint import pprint
     for date in args.date:
         path = self.get_path(date)
         pprint(analysis.analyze(path))