Esempio n. 1
0
def main():
    if len(sys.argv) == 2:
        jar_path = sys.argv[1]
    else:
        print ('[ERROR] jar path need input!')
        return
    extractor = Extractor(jar_path)
    extractor.start()
Esempio n. 2
0
def main():
    if len(sys.argv) == 2:
        jar_path = sys.argv[1]
    else:
        print('[ERROR] jar path need input!')
        return
    extractor = Extractor(jar_path)
    extractor.start()
    def collect(user: str, date_range: DateRange) -> Extractor:
        data = []
        for year in date_range.iter_year():
            url = f"https://github.com/{user}"
            params = {
                "from": f"{year}-01-01",
            }

            text = Request.fetch_text(url, params)
            rects = BeautifulSoup(text, "html.parser").findAll("rect")

            for rect in rects:
                data_date = rect["data-date"]
                data_count = rect["data-count"]

                if date.fromisoformat(data_date) in date_range:
                    data.append(
                        [
                            pd.Timestamp(data_date),
                            pd.to_numeric(data_count),
                        ]
                    )

        return Extractor(
            user=user,
            df=pd.DataFrame(
                data=data,
                columns=["date", "count"],
            ),
        )
Esempio n. 4
0
def extract_features():
    # Get the dataset.
    data = DataSet()

    # get the model.
    model = Extractor(SAVED_CNN_EXTRACTOR_MODEL)

    if not os.path.exists(PROCESSED_SEQUENCES_DATA_DIR):
        os.makedirs(PROCESSED_SEQUENCES_DATA_DIR)

    # Loop through data.
    folders = ['train', 'test']
    #     folders = ['train']
    for folder in folders:
        print(f'Extracting features from {folder} videos...')
        video_filenames = list(data.data[folder].keys())
        #         video_filenames=['171']
        pbar = tqdm(total=len(video_filenames))
        for video_filename in video_filenames:

            # Get the path to the sequence for this video.
            path = os.path.join(PROCESSED_SEQUENCES_DATA_DIR, video_filename +
                                '-features')  # numpy will auto-append .npy

            # Check if we already have it.
            if os.path.isfile(path + '.npy'):
                pbar.update(1)
                continue

            # Get the frames for this video.
            frames = data.get_frames_paths(folder, video_filename)

            # Now loop through and extract features to build the sequence.
            sequence = []
            for image in frames:
                features = model.extract(image)
                sequence.append(features)

            # Save the sequence.
            np.save(path, sequence)

            pbar.update(1)

        pbar.close()
Esempio n. 5
0
    def run(self):
        # TODO: Unncomment for uncompress and move events json file compressed
        # self._uncompress_events()

        # TODO: Unncomment for deleting events json file compressed
        # Delete events json file
        # os.remove(src_gz_file)

        extractor = Extractor(self.src_db_config)
        df_plans = extractor.export_table_to_df(self.table_plans)

        transformer = Transformer()
        df_events = transformer.create_events_info_df_from_file(
            self.src_events)
        df_purchase = transformer.create_purchase_detail_df_from_df(
            df_events, df_plans)

        loader = Loader(db_name='test_fever')

        events_json = df_events.to_dict(orient="records")
        # loader.delete_many(collection_name=self.mongo_db_events, json_query={}) # Test

        try:
            loader.insert_many(collection_name=self.mongo_db_events,
                               json_list=events_json)
        except pymongo.errors.BulkWriteError:
            loader.upsert_many_one_by_one(collection_name=self.mongo_db_events,
                                          json_list=events_json)

        purchase_json = df_purchase.to_dict(orient="records")

        # loader.delete_many(collection_name=self.mongo_db_purchase, json_query={}) # Test
        try:
            loader.insert_many(collection_name=self.mongo_db_purchase,
                               json_list=purchase_json)
        except pymongo.errors.BulkWriteError:
            loader.upsert_many_one_by_one(
                collection_name=self.mongo_db_purchase,
                json_list=purchase_json)
Esempio n. 6
0
class TestExtractor(unittest.TestCase):
    def setUp(self):
        src_db_config = "/opt/repos/plan-test/config/db_config.json"
        self.extractor = Extractor(src_db_config)

    def test_list_data_bases(self):
        res = self.extractor.list_data_bases()
        self.assertTrue("information_schema" in res)
        self.assertTrue("fevertest" in res)

    def test_list_tables(self):
        res = self.extractor.list_tables()
        self.assertTrue("fever_plans" in res)

    def test_execute(self):
        query = "SELECT * FROM fever_plans"
        result = self.extractor.execute(query)
        self.assertEqual(552, result.rowcount)

    def test_export_table_to_csv(self):
        table = "fever_plans"
        csv_dst = "/opt/repos/plan-test/test/out/fever_plans.csv"
        self.extractor.export_table_to_csv(table, csv_dst)
def __writer() -> Writer:
    return Writer(
        extractor=Extractor(
            user="******",
            df=pd.DataFrame(
                {
                    "count": [0],
                    "date": pd.date_range(
                        start="2010-12-25",
                        end="2010-12-25",
                    ),
                }
            ),
        ),
        skeleton_string_map={
            "header-section": "header-section {user}",
            "repository": "repository {link}",
            "repository-title": "repository-title",
            "issue": "issue {link}",
            "issue-title": "issue-title",
            "summary-section": "summary-section",
            "today": "today {date} {count} {length}",
            "today-peak": "today-peak {start} {length}",
            "max": "max {date} {count}",
            "max-peak": "max-peak {start} {end} {length}",
            "total": "total {sum} {avg}",
            "graph-section": "graph-section",
            "count-sum-recent": "count-sum-recent",
            "count-sum-full": "count-sum-full",
            "dayofweek-sum-recent": "dayofweek-sum-recent",
            "dayofweek-mean-full": "dayofweek-mean-full",
            "month-sum-recent": "month-sum-recent",
            "year-sum-full": "year-sum-full",
            "contribution-count": "contribution-count",
            "day": "day",
            "dayofweek": "dayofweek",
            "month": "month",
            "year": "year",
        },
        skeleton_list_map={
            "dayofweek": ["zero", "one", "two", "three", "four", "five", "six"],
            "month": ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven"],
        },
    )
Esempio n. 8
0
    SAVED_CNN_EXTRACTOR_MODEL, SAVED_RNN_MODEL
from src.data import DataSet
from src.extractor import Extractor
from src.utils import VideoHelper


def prepare_sequence_for_rnn(sequence):
    rnn_input = []
    for r in range(RNN_WINDOW_SIZE, len(sequence)):
        l = r - RNN_WINDOW_SIZE
        window = np.asarray(sequence[l:r])
        rnn_input.append(window)
    return np.asarray(rnn_input)


cnn_extractor_model = Extractor(SAVED_CNN_EXTRACTOR_MODEL)
rnn_model = load_model(SAVED_RNN_MODEL,
                       custom_objects={
                           'ccc_loss': metrics.ccc_loss,
                           'rmse': metrics.rmse,
                           'rmse_v': metrics.rmse_v,
                           'rmse_a': metrics.rmse_a,
                           'cc_v': metrics.cc_v,
                           'cc_a': metrics.cc_a,
                           'ccc_v': metrics.ccc_v,
                           'ccc_a': metrics.ccc_a
                       })

video_helper = VideoHelper()
private_test_video_filenames = video_helper.get_private_test_video_filenames()
Esempio n. 9
0
#input: English sentence
if __name__ == '__main__':
  sentence = ''
  if len(sys.argv) == 3:
    experiment_dir = sys.argv[1]
    sentence = sys.argv[2]
  else:
    assert False
	
  # load config
  _dir = os.path.dirname(os.path.abspath(__file__))
  config = SMTSemparseConfig(_dir+'/settings.yaml', _dir+'/dependencies.yaml')

  #stem
  sentence = Extractor(config).preprocess_nl(sentence)

  # we need a temp dir!
  temp_dir = tempfile.mkdtemp()

  #decode
  moses = Moses(config)
  moses.decode_sentence(experiment_dir, sentence, temp_dir)

  #convert to bracket structure
  print Functionalizer(config).run_sentence(experiment_dir, temp_dir)

  #delete tmp files
  shutil.rmtree(temp_dir)

Esempio n. 10
0
 def setUp(self):
     src_db_config = "/opt/repos/plan-test/config/db_config.json"
     self.extractor = Extractor(src_db_config)